/**
 * Author......: See docs/credits.txt
 * License.....: MIT
 * NOTE........: sboxes for maxwell were taken from DeepLearningJohnDoe, license below
 *             : sboxes for others were takes fron JtR, license below
 */

#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#endif

#define COMPARE_S M2S(INCLUDE_PATH/inc_comp_single_bs.cl)
#define COMPARE_M M2S(INCLUDE_PATH/inc_comp_multi_bs.cl)

#ifdef IS_NV
#define KXX_DECL
#endif

#if (defined IS_AMD || defined IS_HIP)
#define KXX_DECL
#endif

#ifdef IS_GENERIC
#define KXX_DECL
#endif

#ifdef IS_NV

#if CUDA_ARCH >= 500

//
// Bitslice DES S-boxes with LOP3.LUT instructions
// For NVIDIA Maxwell architecture and CUDA 7.5 RC
// by DeepLearningJohnDoe, version 0.1.6, 2015/07/19
//
// Gate counts: 25 24 25 18 25 24 24 23
// Average: 23.5
// Depth: 8 7 7 6 8 10 10 8
// Average: 8
//
// Note that same S-box function with a lower gate count isn't necessarily faster.
//
// These Boolean expressions corresponding to DES S-boxes were
// discovered by <deeplearningjohndoe at gmail.com>
//
// This file itself is Copyright (c) 2015 by <deeplearningjohndoe at gmail.com>
// Redistribution and use in source and binary forms, with or without
// modification, are permitted.
//
// The underlying mathematical formulas are NOT copyrighted.
//

#define LUT(a,b,c,d,e) u32 a; asm ("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d));

DECLSPEC void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
  LUT (xAA55AA5500550055, a1, a4, a6, 0xC1)
  LUT (xA55AA55AF0F5F0F5, a3, a6, xAA55AA5500550055, 0x9E)
  LUT (x5F5F5F5FA5A5A5A5, a1, a3, a6, 0xD6)
  LUT (xF5A0F5A0A55AA55A, a4, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x56)
  LUT (x947A947AD1E7D1E7, a2, xA55AA55AF0F5F0F5, xF5A0F5A0A55AA55A, 0x6C)
  LUT (x5FFF5FFFFFFAFFFA, a6, xAA55AA5500550055, x5F5F5F5FA5A5A5A5, 0x7B)
  LUT (xB96CB96C69936993, a2, xF5A0F5A0A55AA55A, x5FFF5FFFFFFAFFFA, 0xD6)
  LUT (x3, a5, x947A947AD1E7D1E7, xB96CB96C69936993, 0x6A)
  LUT (x55EE55EE55EE55EE, a1, a2, a4, 0x7A)
  LUT (x084C084CB77BB77B, a2, a6, xF5A0F5A0A55AA55A, 0xC9)
  LUT (x9C329C32E295E295, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x72)
  LUT (xA51EA51E50E050E0, a3, a6, x55EE55EE55EE55EE, 0x29)
  LUT (x4AD34AD3BE3CBE3C, a2, x947A947AD1E7D1E7, xA51EA51E50E050E0, 0x95)
  LUT (x2, a5, x9C329C32E295E295, x4AD34AD3BE3CBE3C, 0xC6)
  LUT (xD955D95595D195D1, a1, a2, x9C329C32E295E295, 0xD2)
  LUT (x8058805811621162, x947A947AD1E7D1E7, x55EE55EE55EE55EE, x084C084CB77BB77B, 0x90)
  LUT (x7D0F7D0FC4B3C4B3, xA51EA51E50E050E0, xD955D95595D195D1, x8058805811621162, 0x76)
  LUT (x0805080500010001, a3, xAA55AA5500550055, xD955D95595D195D1, 0x80)
  LUT (x4A964A96962D962D, xB96CB96C69936993, x4AD34AD3BE3CBE3C, x0805080500010001, 0xA6)
  LUT (x4, a5, x7D0F7D0FC4B3C4B3, x4A964A96962D962D, 0xA6)
  LUT (x148014807B087B08, a1, xAA55AA5500550055, x947A947AD1E7D1E7, 0x21)
  LUT (x94D894D86B686B68, xA55AA55AF0F5F0F5, x8058805811621162, x148014807B087B08, 0x6A)
  LUT (x5555555540044004, a1, a6, x084C084CB77BB77B, 0x70)
  LUT (xAFB4AFB4BF5BBF5B, x5F5F5F5FA5A5A5A5, xA51EA51E50E050E0, x5555555540044004, 0x97)
  LUT (x1, a5, x94D894D86B686B68, xAFB4AFB4BF5BBF5B, 0x6C)

  *out1 ^= x1;
  *out2 ^= x2;
  *out3 ^= x3;
  *out4 ^= x4;
}

DECLSPEC void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
  LUT (xEEEEEEEE99999999, a1, a2, a6, 0x97)
  LUT (xFFFFEEEE66666666, a5, a6, xEEEEEEEE99999999, 0x67)
  LUT (x5555FFFFFFFF0000, a1, a5, a6, 0x76)
  LUT (x6666DDDD5555AAAA, a2, xFFFFEEEE66666666, x5555FFFFFFFF0000, 0x69)
  LUT (x6969D3D35353ACAC, a3, xFFFFEEEE66666666, x6666DDDD5555AAAA, 0x6A)
  LUT (xCFCF3030CFCF3030, a2, a3, a5, 0x65)
  LUT (xE4E4EEEE9999F0F0, a3, xEEEEEEEE99999999, x5555FFFFFFFF0000, 0x8D)
  LUT (xE5E5BABACDCDB0B0, a1, xCFCF3030CFCF3030, xE4E4EEEE9999F0F0, 0xCA)
  LUT (x3, a4, x6969D3D35353ACAC, xE5E5BABACDCDB0B0, 0xC6)
  LUT (x3333CCCC00000000, a2, a5, a6, 0x14)
  LUT (xCCCCDDDDFFFF0F0F, a5, xE4E4EEEE9999F0F0, x3333CCCC00000000, 0xB5)
  LUT (x00000101F0F0F0F0, a3, a6, xFFFFEEEE66666666, 0x1C)
  LUT (x9A9A64646A6A9595, a1, xCFCF3030CFCF3030, x00000101F0F0F0F0, 0x96)
  LUT (x2, a4, xCCCCDDDDFFFF0F0F, x9A9A64646A6A9595, 0x6A)
  LUT (x3333BBBB3333FFFF, a1, a2, x6666DDDD5555AAAA, 0xDE)
  LUT (x1414141441410000, a1, a3, xE4E4EEEE9999F0F0, 0x90)
  LUT (x7F7FF3F3F5F53939, x6969D3D35353ACAC, x9A9A64646A6A9595, x3333BBBB3333FFFF, 0x79)
  LUT (x9494E3E34B4B3939, a5, x1414141441410000, x7F7FF3F3F5F53939, 0x29)
  LUT (x1, a4, x3333BBBB3333FFFF, x9494E3E34B4B3939, 0xA6)
  LUT (xB1B1BBBBCCCCA5A5, a1, a1, xE4E4EEEE9999F0F0, 0x4A)
  LUT (xFFFFECECEEEEDDDD, a2, x3333CCCC00000000, x9A9A64646A6A9595, 0xEF)
  LUT (xB1B1A9A9DCDC8787, xE5E5BABACDCDB0B0, xB1B1BBBBCCCCA5A5, xFFFFECECEEEEDDDD, 0x8D)
  LUT (xFFFFCCCCEEEE4444, a2, a5, xFFFFEEEE66666666, 0x2B)
  LUT (x4, a4, xB1B1A9A9DCDC8787, xFFFFCCCCEEEE4444, 0x6C)

  *out1 ^= x1;
  *out2 ^= x2;
  *out3 ^= x3;
  *out4 ^= x4;
}

DECLSPEC void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
  LUT (xA50FA50FA50FA50F, a1, a3, a4, 0xC9)
  LUT (xF0F00F0FF0F0F0F0, a3, a5, a6, 0x4B)
  LUT (xAF0FA0AAAF0FAF0F, a1, xA50FA50FA50FA50F, xF0F00F0FF0F0F0F0, 0x4D)
  LUT (x5AA5A55A5AA55AA5, a1, a4, xF0F00F0FF0F0F0F0, 0x69)
  LUT (xAA005FFFAA005FFF, a3, a5, xA50FA50FA50FA50F, 0xD6)
  LUT (x5AA5A55A0F5AFAA5, a6, x5AA5A55A5AA55AA5, xAA005FFFAA005FFF, 0x9C)
  LUT (x1, a2, xAF0FA0AAAF0FAF0F, x5AA5A55A0F5AFAA5, 0xA6)
  LUT (xAA55AA5500AA00AA, a1, a4, a6, 0x49)
  LUT (xFAFAA50FFAFAA50F, a1, a5, xA50FA50FA50FA50F, 0x9B)
  LUT (x50AF0F5AFA50A5A5, a1, xAA55AA5500AA00AA, xFAFAA50FFAFAA50F, 0x66)
  LUT (xAFAFAFAFFAFAFAFA, a1, a3, a6, 0x6F)
  LUT (xAFAFFFFFFFFAFAFF, a4, x50AF0F5AFA50A5A5, xAFAFAFAFFAFAFAFA, 0xEB)
  LUT (x4, a2, x50AF0F5AFA50A5A5, xAFAFFFFFFFFAFAFF, 0x6C)
  LUT (x500F500F500F500F, a1, a3, a4, 0x98)
  LUT (xF0505A0505A5050F, x5AA5A55A0F5AFAA5, xAA55AA5500AA00AA, xAFAFAFAFFAFAFAFA, 0x1D)
  LUT (xF0505A05AA55AAFF, a6, x500F500F500F500F, xF0505A0505A5050F, 0x9A)
  LUT (xFF005F55FF005F55, a1, a4, xAA005FFFAA005FFF, 0xB2)
  LUT (xA55F5AF0A55F5AF0, a5, xA50FA50FA50FA50F, x5AA5A55A5AA55AA5, 0x3D)
  LUT (x5A5F05A5A55F5AF0, a6, xFF005F55FF005F55, xA55F5AF0A55F5AF0, 0xA6)
  LUT (x3, a2, xF0505A05AA55AAFF, x5A5F05A5A55F5AF0, 0xA6)
  LUT (x0F0F0F0FA5A5A5A5, a1, a3, a6, 0xC6)
  LUT (x5FFFFF5FFFA0FFA0, x5AA5A55A5AA55AA5, xAFAFAFAFFAFAFAFA, x0F0F0F0FA5A5A5A5, 0xDB)
  LUT (xF5555AF500A05FFF, a5, xFAFAA50FFAFAA50F, xF0505A0505A5050F, 0xB9)
  LUT (x05A5AAF55AFA55A5, xF0505A05AA55AAFF, x0F0F0F0FA5A5A5A5, xF5555AF500A05FFF, 0x9B)
  LUT (x2, a2, x5FFFFF5FFFA0FFA0, x05A5AAF55AFA55A5, 0xA6)

  *out1 ^= x1;
  *out2 ^= x2;
  *out3 ^= x3;
  *out4 ^= x4;
}

DECLSPEC void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
  LUT (x55F055F055F055F0, a1, a3, a4, 0x72)
  LUT (xA500F5F0A500F5F0, a3, a5, x55F055F055F055F0, 0xAD)
  LUT (xF50AF50AF50AF50A, a1, a3, a4, 0x59)
  LUT (xF5FA0FFFF5FA0FFF, a3, a5, xF50AF50AF50AF50A, 0xE7)
  LUT (x61C8F93C61C8F93C, a2, xA500F5F0A500F5F0, xF5FA0FFFF5FA0FFF, 0xC6)
  LUT (x9999666699996666, a1, a2, a5, 0x69)
  LUT (x22C022C022C022C0, a2, a4, x55F055F055F055F0, 0x18)
  LUT (xB35C94A6B35C94A6, xF5FA0FFFF5FA0FFF, x9999666699996666, x22C022C022C022C0, 0x63)
  LUT (x4, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x6A)
  LUT (x4848484848484848, a1, a2, a3, 0x12)
  LUT (x55500AAA55500AAA, a1, a5, xF5FA0FFFF5FA0FFF, 0x28)
  LUT (x3C90B3D63C90B3D6, x61C8F93C61C8F93C, x4848484848484848, x55500AAA55500AAA, 0x1E)
  LUT (x8484333384843333, a1, x9999666699996666, x4848484848484848, 0x14)
  LUT (x4452F1AC4452F1AC, xF50AF50AF50AF50A, xF5FA0FFFF5FA0FFF, xB35C94A6B35C94A6, 0x78)
  LUT (x9586CA379586CA37, x55500AAA55500AAA, x8484333384843333, x4452F1AC4452F1AC, 0xD6)
  LUT (x2, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0x6A)
  LUT (x1, a6, x3C90B3D63C90B3D6, x9586CA379586CA37, 0xA9)
  LUT (x3, a6, x61C8F93C61C8F93C, xB35C94A6B35C94A6, 0x56)

  *out1 ^= x1;
  *out2 ^= x2;
  *out3 ^= x3;
  *out4 ^= x4;
}

DECLSPEC void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
  LUT (xA0A0A0A0FFFFFFFF, a1, a3, a6, 0xAB)
  LUT (xFFFF00005555FFFF, a1, a5, a6, 0xB9)
  LUT (xB3B320207777FFFF, a2, xA0A0A0A0FFFFFFFF, xFFFF00005555FFFF, 0xE8)
  LUT (x50505A5A5A5A5050, a1, a3, xFFFF00005555FFFF, 0x34)
  LUT (xA2A2FFFF2222FFFF, a1, a5, xB3B320207777FFFF, 0xCE)
  LUT (x2E2E6969A4A46363, a2, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, 0x29)
  LUT (x3, a4, xB3B320207777FFFF, x2E2E6969A4A46363, 0xA6)
  LUT (xA5A50A0AA5A50A0A, a1, a3, a5, 0x49)
  LUT (x969639396969C6C6, a2, a6, xA5A50A0AA5A50A0A, 0x96)
  LUT (x1B1B1B1B1B1B1B1B, a1, a2, a3, 0xCA)
  LUT (xBFBFBFBFF6F6F9F9, a3, xA0A0A0A0FFFFFFFF, x969639396969C6C6, 0x7E)
  LUT (x5B5BA4A4B8B81D1D, xFFFF00005555FFFF, x1B1B1B1B1B1B1B1B, xBFBFBFBFF6F6F9F9, 0x96)
  LUT (x2, a4, x969639396969C6C6, x5B5BA4A4B8B81D1D, 0xCA)
  LUT (x5555BBBBFFFF5555, a1, a2, xFFFF00005555FFFF, 0xE5)
  LUT (x6D6D9C9C95956969, x50505A5A5A5A5050, xA2A2FFFF2222FFFF, x969639396969C6C6, 0x97)
  LUT (x1A1A67676A6AB4B4, xA5A50A0AA5A50A0A, x5555BBBBFFFF5555, x6D6D9C9C95956969, 0x47)
  LUT (xA0A0FFFFAAAA0000, a3, xFFFF00005555FFFF, xA5A50A0AA5A50A0A, 0x3B)
  LUT (x36369C9CC1C1D6D6, x969639396969C6C6, x6D6D9C9C95956969, xA0A0FFFFAAAA0000, 0xD9)
  LUT (x1, a4, x1A1A67676A6AB4B4, x36369C9CC1C1D6D6, 0xCA)
  LUT (x5555F0F0F5F55555, a1, a3, xFFFF00005555FFFF, 0xB1)
  LUT (x79790202DCDC0808, xA2A2FFFF2222FFFF, xA5A50A0AA5A50A0A, x969639396969C6C6, 0x47)
  LUT (x6C6CF2F229295D5D, xBFBFBFBFF6F6F9F9, x5555F0F0F5F55555, x79790202DCDC0808, 0x6E)
  LUT (xA3A3505010101A1A, a2, xA2A2FFFF2222FFFF, x36369C9CC1C1D6D6, 0x94)
  LUT (x7676C7C74F4FC7C7, a1, x2E2E6969A4A46363, xA3A3505010101A1A, 0xD9)
  LUT (x4, a4, x6C6CF2F229295D5D, x7676C7C74F4FC7C7, 0xC6)

  *out1 ^= x1;
  *out2 ^= x2;
  *out3 ^= x3;
  *out4 ^= x4;
}

DECLSPEC void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
  LUT (x5050F5F55050F5F5, a1, a3, a5, 0xB2)
  LUT (x6363C6C66363C6C6, a1, a2, x5050F5F55050F5F5, 0x66)
  LUT (xAAAA5555AAAA5555, a1, a1, a5, 0xA9)
  LUT (x3A3A65653A3A6565, a3, x6363C6C66363C6C6, xAAAA5555AAAA5555, 0xA9)
  LUT (x5963A3C65963A3C6, a4, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xC6)
  LUT (xE7E76565E7E76565, a5, x6363C6C66363C6C6, x3A3A65653A3A6565, 0xAD)
  LUT (x455D45DF455D45DF, a1, a4, xE7E76565E7E76565, 0xE4)
  LUT (x4, a6, x5963A3C65963A3C6, x455D45DF455D45DF, 0x6C)
  LUT (x1101220211012202, a2, xAAAA5555AAAA5555, x5963A3C65963A3C6, 0x20)
  LUT (xF00F0FF0F00F0FF0, a3, a4, a5, 0x69)
  LUT (x16E94A9716E94A97, xE7E76565E7E76565, x1101220211012202, xF00F0FF0F00F0FF0, 0x9E)
  LUT (x2992922929929229, a1, a2, xF00F0FF0F00F0FF0, 0x49)
  LUT (xAFAF9823AFAF9823, a5, x5050F5F55050F5F5, x2992922929929229, 0x93)
  LUT (x3, a6, x16E94A9716E94A97, xAFAF9823AFAF9823, 0x6C)
  LUT (x4801810248018102, a4, x5963A3C65963A3C6, x1101220211012202, 0xA4)
  LUT (x5EE8FFFD5EE8FFFD, a5, x16E94A9716E94A97, x4801810248018102, 0x76)
  LUT (xF0FF00FFF0FF00FF, a3, a4, a5, 0xCD)
  LUT (x942D9A67942D9A67, x3A3A65653A3A6565, x5EE8FFFD5EE8FFFD, xF0FF00FFF0FF00FF, 0x86)
  LUT (x1, a6, x5EE8FFFD5EE8FFFD, x942D9A67942D9A67, 0xA6)
  LUT (x6A40D4ED6F4DD4EE, a2, x4, xAFAF9823AFAF9823, 0x2D)
  LUT (x6CA89C7869A49C79, x1101220211012202, x16E94A9716E94A97, x6A40D4ED6F4DD4EE, 0x26)
  LUT (xD6DE73F9D6DE73F9, a3, x6363C6C66363C6C6, x455D45DF455D45DF, 0x6B)
  LUT (x925E63E1965A63E1, x3A3A65653A3A6565, x6CA89C7869A49C79, xD6DE73F9D6DE73F9, 0xA2)
  LUT (x2, a6, x6CA89C7869A49C79, x925E63E1965A63E1, 0xCA)

  *out1 ^= x1;
  *out2 ^= x2;
  *out3 ^= x3;
  *out4 ^= x4;
}

DECLSPEC void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
  LUT (x88AA88AA88AA88AA, a1, a2, a4, 0x0B)
  LUT (xAAAAFF00AAAAFF00, a1, a4, a5, 0x27)
  LUT (xADAFF8A5ADAFF8A5, a3, x88AA88AA88AA88AA, xAAAAFF00AAAAFF00, 0x9E)
  LUT (x0A0AF5F50A0AF5F5, a1, a3, a5, 0xA6)
  LUT (x6B69C5DC6B69C5DC, a2, xADAFF8A5ADAFF8A5, x0A0AF5F50A0AF5F5, 0x6B)
  LUT (x1C69B2DC1C69B2DC, a4, x88AA88AA88AA88AA, x6B69C5DC6B69C5DC, 0xA9)
  LUT (x1, a6, xADAFF8A5ADAFF8A5, x1C69B2DC1C69B2DC, 0x6A)
  LUT (x9C9C9C9C9C9C9C9C, a1, a2, a3, 0x63)
  LUT (xE6E63BFDE6E63BFD, a2, xAAAAFF00AAAAFF00, x0A0AF5F50A0AF5F5, 0xE7)
  LUT (x6385639E6385639E, a4, x9C9C9C9C9C9C9C9C, xE6E63BFDE6E63BFD, 0x93)
  LUT (x5959C4CE5959C4CE, a2, x6B69C5DC6B69C5DC, xE6E63BFDE6E63BFD, 0x5D)
  LUT (x5B53F53B5B53F53B, a4, x0A0AF5F50A0AF5F5, x5959C4CE5959C4CE, 0x6E)
  LUT (x3, a6, x6385639E6385639E, x5B53F53B5B53F53B, 0xC6)
  LUT (xFAF505FAFAF505FA, a3, a4, x0A0AF5F50A0AF5F5, 0x6D)
  LUT (x6A65956A6A65956A, a3, x9C9C9C9C9C9C9C9C, xFAF505FAFAF505FA, 0xA6)
  LUT (x8888CCCC8888CCCC, a1, a2, a5, 0x23)
  LUT (x94E97A9494E97A94, x1C69B2DC1C69B2DC, x6A65956A6A65956A, x8888CCCC8888CCCC, 0x72)
  LUT (x4, a6, x6A65956A6A65956A, x94E97A9494E97A94, 0xAC)
  LUT (xA050A050A050A050, a1, a3, a4, 0x21)
  LUT (xC1B87A2BC1B87A2B, xAAAAFF00AAAAFF00, x5B53F53B5B53F53B, x94E97A9494E97A94, 0xA4)
  LUT (xE96016B7E96016B7, x8888CCCC8888CCCC, xA050A050A050A050, xC1B87A2BC1B87A2B, 0x96)
  LUT (xE3CF1FD5E3CF1FD5, x88AA88AA88AA88AA, x6A65956A6A65956A, xE96016B7E96016B7, 0x3E)
  LUT (x6776675B6776675B, xADAFF8A5ADAFF8A5, x94E97A9494E97A94, xE3CF1FD5E3CF1FD5, 0x6B)
  LUT (x2, a6, xE96016B7E96016B7, x6776675B6776675B, 0xC6)

  *out1 ^= x1;
  *out2 ^= x2;
  *out3 ^= x3;
  *out4 ^= x4;
}

DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
  LUT (xEEEE3333EEEE3333, a1, a2, a5, 0x9D)
  LUT (xBBBBBBBBBBBBBBBB, a1, a1, a2, 0x83)
  LUT (xDDDDAAAADDDDAAAA, a1, a2, a5, 0x5B)
  LUT (x29295A5A29295A5A, a3, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0x85)
  LUT (xC729695AC729695A, a4, xEEEE3333EEEE3333, x29295A5A29295A5A, 0xA6)
  LUT (x3BF77B7B3BF77B7B, a2, a5, xC729695AC729695A, 0xF9)
  LUT (x2900FF002900FF00, a4, a5, x29295A5A29295A5A, 0x0E)
  LUT (x56B3803F56B3803F, xBBBBBBBBBBBBBBBB, x3BF77B7B3BF77B7B, x2900FF002900FF00, 0x61)
  LUT (x4, a6, xC729695AC729695A, x56B3803F56B3803F, 0x6C)
  LUT (xFBFBFBFBFBFBFBFB, a1, a2, a3, 0xDF)
  LUT (x3012B7B73012B7B7, a2, a5, xC729695AC729695A, 0xD4)
  LUT (x34E9B34C34E9B34C, a4, xFBFBFBFBFBFBFBFB, x3012B7B73012B7B7, 0x69)
  LUT (xBFEAEBBEBFEAEBBE, a1, x29295A5A29295A5A, x34E9B34C34E9B34C, 0x6F)
  LUT (xFFAEAFFEFFAEAFFE, a3, xBBBBBBBBBBBBBBBB, xBFEAEBBEBFEAEBBE, 0xB9)
  LUT (x2, a6, x34E9B34C34E9B34C, xFFAEAFFEFFAEAFFE, 0xC6)
  LUT (xCFDE88BBCFDE88BB, a2, xDDDDAAAADDDDAAAA, x34E9B34C34E9B34C, 0x5C)
  LUT (x3055574530555745, a1, xC729695AC729695A, xCFDE88BBCFDE88BB, 0x71)
  LUT (x99DDEEEE99DDEEEE, a4, xBBBBBBBBBBBBBBBB, xDDDDAAAADDDDAAAA, 0xB9)
  LUT (x693CD926693CD926, x3BF77B7B3BF77B7B, x34E9B34C34E9B34C, x99DDEEEE99DDEEEE, 0x69)
  LUT (x3, a6, x3055574530555745, x693CD926693CD926, 0x6A)
  LUT (x9955EE559955EE55, a1, a4, x99DDEEEE99DDEEEE, 0xE2)
  LUT (x9D48FA949D48FA94, x3BF77B7B3BF77B7B, xBFEAEBBEBFEAEBBE, x9955EE559955EE55, 0x9C)
  LUT (x1, a6, xC729695AC729695A, x9D48FA949D48FA94, 0x39)

  *out1 ^= x1;
  *out2 ^= x2;
  *out3 ^= x3;
  *out4 ^= x4;
}

#else

/*
 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
 * architectures.  These use AND, OR, XOR, NOT, and AND-NOT gates.
 *
 * Gate counts: 49 44 46 33 48 46 46 41
 * Average: 44.125
 *
 * Several same-gate-count expressions for each S-box are included (for use on
 * different CPUs/GPUs).
 *
 * These Boolean expressions corresponding to DES S-boxes have been generated
 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
 * John the Ripper password cracker: http://www.openwall.com/john/
 * Being mathematical formulas, they are not copyrighted and are free for reuse
 * by anyone.
 *
 * This file (a specific representation of the S-box expressions, surrounding
 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted.  (This is a heavily cut-down "BSD license".)
 *
 * The effort has been sponsored by Rapid7: http://www.rapid7.com
 */

DECLSPEC void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
    u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
        x25202160;
    u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
    u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
    u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
    u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
    u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
    u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
    u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
    u32 x00, x01, x10, x11, x20, x21, x30, x31;

    x55005500 = a1 & ~a5;
    x5A0F5A0F = a4 ^ x55005500;
    x3333FFFF = a3 | a6;
    x66666666 = a1 ^ a3;
    x22226666 = x3333FFFF & x66666666;
    x2D2D6969 = a4 ^ x22226666;
    x25202160 = x2D2D6969 & ~x5A0F5A0F;

    x00FFFF00 = a5 ^ a6;
    x33CCCC33 = a3 ^ x00FFFF00;
    x4803120C = x5A0F5A0F & ~x33CCCC33;
    x2222FFFF = a6 | x22226666;
    x6A21EDF3 = x4803120C ^ x2222FFFF;
    x4A01CC93 = x6A21EDF3 & ~x25202160;

    x5555FFFF = a1 | a6;
    x7F75FFFF = x6A21EDF3 | x5555FFFF;
    x00D20096 = a5 & ~x2D2D6969;
    x7FA7FF69 = x7F75FFFF ^ x00D20096;

    x0A0A0000 = a4 & ~x5555FFFF;
    x0AD80096 = x00D20096 ^ x0A0A0000;
    x00999900 = x00FFFF00 & ~x66666666;
    x0AD99996 = x0AD80096 | x00999900;

    x22332233 = a3 & ~x55005500;
    x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
    x054885C0 = x257AA5F0 & ~x22332233;
    xFAB77A3F = ~x054885C0;
    x2221EDF3 = x3333FFFF & x6A21EDF3;
    xD89697CC = xFAB77A3F ^ x2221EDF3;
    x20 = x7FA7FF69 & ~a2;
    x21 = x20 ^ xD89697CC;
    *out3 ^= x21;

    x05B77AC0 = x00FFFF00 ^ x054885C0;
    x05F77AD6 = x00D20096 | x05B77AC0;
    x36C48529 = x3333FFFF ^ x05F77AD6;
    x6391D07C = a1 ^ x36C48529;
    xBB0747B0 = xD89697CC ^ x6391D07C;
    x00 = x25202160 | a2;
    x01 = x00 ^ xBB0747B0;
    *out1 ^= x01;

    x4C460000 = x3333FFFF ^ x7F75FFFF;
    x4EDF9996 = x0AD99996 | x4C460000;
    x2D4E49EA = x6391D07C ^ x4EDF9996;
    xBBFFFFB0 = x00FFFF00 | xBB0747B0;
    x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
    x10 = x4A01CC93 | a2;
    x11 = x10 ^ x96B1B65A;
    *out2 ^= x11;

    x5AFF5AFF = a5 | x5A0F5A0F;
    x52B11215 = x5AFF5AFF & ~x2D4E49EA;
    x4201C010 = x4A01CC93 & x6391D07C;
    x10B0D205 = x52B11215 ^ x4201C010;
    x30 = x10B0D205 | a2;
    x31 = x30 ^ x0AD99996;
    *out4 ^= x31;
}

DECLSPEC void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
    u32 x33CC33CC;
    u32 x55550000, x00AA00FF, x33BB33FF;
    u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
    u32 x00000F0F, x336600FF, x332200FF, x332200F0;
    u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
    u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
    u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
    u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
    u32 x0A451047, xBBDFDD7B, xB19ACD3C;
    u32 x00, x01, x10, x11, x20, x21, x30, x31;

    x33CC33CC = a2 ^ a5;

    x55550000 = a1 & ~a6;
    x00AA00FF = a5 & ~x55550000;
    x33BB33FF = a2 | x00AA00FF;

    x33CC0000 = x33CC33CC & ~a6;
    x11441144 = a1 & x33CC33CC;
    x11BB11BB = a5 ^ x11441144;
    x003311BB = x11BB11BB & ~x33CC0000;

    x00000F0F = a3 & a6;
    x336600FF = x00AA00FF ^ x33CC0000;
    x332200FF = x33BB33FF & x336600FF;
    x332200F0 = x332200FF & ~x00000F0F;

    x0302000F = a3 & x332200FF;
    xAAAAAAAA = ~a1;
    xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
    x33CCCC33 = a6 ^ x33CC33CC;
    x33CCC030 = x33CCCC33 & ~x00000F0F;
    x9A646A95 = xA9A8AAA5 ^ x33CCC030;
    x10 = a4 & ~x332200F0;
    x11 = x10 ^ x9A646A95;
    *out2 ^= x11;

    x00333303 = a2 & ~x33CCC030;
    x118822B8 = x11BB11BB ^ x00333303;
    xA8208805 = xA9A8AAA5 & ~x118822B8;
    x3CC3C33C = a3 ^ x33CCCC33;
    x94E34B39 = xA8208805 ^ x3CC3C33C;
    x00 = x33BB33FF & ~a4;
    x01 = x00 ^ x94E34B39;
    *out1 ^= x01;

    x0331330C = x0302000F ^ x00333303;
    x3FF3F33C = x3CC3C33C | x0331330C;
    xA9DF596A = x33BB33FF ^ x9A646A95;
    xA9DF5F6F = x00000F0F | xA9DF596A;
    x962CAC53 = x3FF3F33C ^ xA9DF5F6F;

    xA9466A6A = x332200FF ^ x9A646A95;
    x3DA52153 = x94E34B39 ^ xA9466A6A;
    x29850143 = xA9DF5F6F & x3DA52153;
    x33C0330C = x33CC33CC & x3FF3F33C;
    x1A45324F = x29850143 ^ x33C0330C;
    x20 = x1A45324F | a4;
    x21 = x20 ^ x962CAC53;
    *out3 ^= x21;

    x0A451047 = x1A45324F & ~x118822B8;
    xBBDFDD7B = x33CCCC33 | xA9DF596A;
    xB19ACD3C = x0A451047 ^ xBBDFDD7B;
    x30 = x003311BB | a4;
    x31 = x30 ^ xB19ACD3C;
    *out4 ^= x31;
}

DECLSPEC void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
    u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
    u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
    u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
    u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
    u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
    u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
    u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
    u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
    u32 x00, x01, x10, x11, x20, x21, x30, x31;

    x44444444 = a1 & ~a2;
    x0F0FF0F0 = a3 ^ a6;
    x4F4FF4F4 = x44444444 | x0F0FF0F0;
    x00FFFF00 = a4 ^ a6;
    x00AAAA00 = x00FFFF00 & ~a1;
    x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;

    x3C3CC3C3 = a2 ^ x0F0FF0F0;
    x3C3C0000 = x3C3CC3C3 & ~a6;
    x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
    x0C840A00 = x4FE55EF4 & ~x7373F4F4;

    x00005EF4 = a6 & x4FE55EF4;
    x00FF5EFF = a4 | x00005EF4;
    x00555455 = a1 & x00FF5EFF;
    x3C699796 = x3C3CC3C3 ^ x00555455;
    x30 = x4FE55EF4 & ~a5;
    x31 = x30 ^ x3C699796;
    *out4 ^= x31;

    x000FF000 = x0F0FF0F0 & x00FFFF00;
    x55AA55AA = a1 ^ a4;
    x26D9A15E = x7373F4F4 ^ x55AA55AA;
    x2FDFAF5F = a3 | x26D9A15E;
    x2FD00F5F = x2FDFAF5F & ~x000FF000;

    x55AAFFAA = x00AAAA00 | x55AA55AA;
    x28410014 = x3C699796 & ~x55AAFFAA;
    x000000FF = a4 & a6;
    x000000CC = x000000FF & ~a2;
    x284100D8 = x28410014 ^ x000000CC;

    x204100D0 = x7373F4F4 & x284100D8;
    x3C3CC3FF = x3C3CC3C3 | x000000FF;
    x1C3CC32F = x3C3CC3FF & ~x204100D0;
    x4969967A = a1 ^ x1C3CC32F;
    x10 = x2FD00F5F & a5;
    x11 = x10 ^ x4969967A;
    *out2 ^= x11;

    x4CC44CC4 = x4FE55EF4 & ~a2;
    x40C040C0 = x4CC44CC4 & ~a3;
    xC3C33C3C = ~x3C3CC3C3;
    x9669C396 = x55AAFFAA ^ xC3C33C3C;
    xD6A98356 = x40C040C0 ^ x9669C396;
    x00 = a5 & ~x0C840A00;
    x01 = x00 ^ xD6A98356;
    *out1 ^= x01;

    xD6E9C3D6 = x40C040C0 | x9669C396;
    x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
    x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
    x001A000B = a4 & ~x4FE55EF4;
    x9A1F2D1B = x9A072D12 | x001A000B;
    x20 = a5 & ~x284100D8;
    x21 = x20 ^ x9A1F2D1B;
    *out3 ^= x21;
}

DECLSPEC void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
    u32 x5A5A5A5A, x0F0FF0F0;
    u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
        x52FBCA0F, x61C8F93C;
    u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
    u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
    u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
    u32 x00, x01, x10, x11, x20, x21, x30, x31;

    x5A5A5A5A = a1 ^ a3;
    x0F0FF0F0 = a3 ^ a5;
    x33FF33FF = a2 | a4;
    x33FFCC00 = a5 ^ x33FF33FF;
    x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
    x0C0CC0C0 = x0F0FF0F0 & ~a2;
    x0CF3C03F = a4 ^ x0C0CC0C0;
    x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
    x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
    x61C8F93C = a2 ^ x52FBCA0F;

    x00C0C03C = x0CF3C03F & x61C8F93C;
    x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
    x3B92A366 = x5A5A5A5A ^ x61C8F93C;
    x30908326 = x3B92A366 & ~x0F0F30C0;
    x3C90B3D6 = x0C0030F0 ^ x30908326;

    x33CC33CC = a2 ^ a4;
    x0C0CFFFF = a5 | x0C0CC0C0;
    x379E5C99 = x3B92A366 ^ x0C0CFFFF;
    x04124C11 = x379E5C99 & ~x33CC33CC;
    x56E9861E = x52FBCA0F ^ x04124C11;
    x00 = a6 & ~x3C90B3D6;
    x01 = x00 ^ x56E9861E;
    *out1 ^= x01;

    xA91679E1 = ~x56E9861E;
    x10 = x3C90B3D6 & ~a6;
    x11 = x10 ^ xA91679E1;
    *out2 ^= x11;

    x9586CA37 = x3C90B3D6 ^ xA91679E1;
    x8402C833 = x9586CA37 & ~x33CC33CC;
    x84C2C83F = x00C0C03C | x8402C833;
    xB35C94A6 = x379E5C99 ^ x84C2C83F;
    x20 = x61C8F93C | a6;
    x21 = x20 ^ xB35C94A6;
    *out3 ^= x21;

    x30 = a6 & x61C8F93C;
    x31 = x30 ^ xB35C94A6;
    *out4 ^= x31;
}

DECLSPEC void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
    u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
    u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
    u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
    u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
    u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
    u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
    u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
    u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
    u32 x00, x01, x10, x11, x20, x21, x30, x31;

    x77777777 = a1 | a3;
    x77770000 = x77777777 & ~a6;
    x22225555 = a1 ^ x77770000;
    x11116666 = a3 ^ x22225555;
    x1F1F6F6F = a4 | x11116666;

    x70700000 = x77770000 & ~a4;
    x43433333 = a3 ^ x70700000;
    x00430033 = a5 & x43433333;
    x55557777 = a1 | x11116666;
    x55167744 = x00430033 ^ x55557777;
    x5A19784B = a4 ^ x55167744;

    x5A1987B4 = a6 ^ x5A19784B;
    x7A3BD7F5 = x22225555 | x5A1987B4;
    x003B00F5 = a5 & x7A3BD7F5;
    x221955A0 = x22225555 ^ x003B00F5;
    x05050707 = a4 & x55557777;
    x271C52A7 = x221955A0 ^ x05050707;

    x2A2A82A0 = x7A3BD7F5 & ~a1;
    x6969B193 = x43433333 ^ x2A2A82A0;
    x1FE06F90 = a5 ^ x1F1F6F6F;
    x16804E00 = x1FE06F90 & ~x6969B193;
    xE97FB1FF = ~x16804E00;
    x20 = xE97FB1FF & ~a2;
    x21 = x20 ^ x5A19784B;
    *out3 ^= x21;

    x43403302 = x43433333 & ~x003B00F5;
    x35CAED30 = x2A2A82A0 ^ x1FE06F90;
    x37DEFFB7 = x271C52A7 | x35CAED30;
    x349ECCB5 = x37DEFFB7 & ~x43403302;
    x0B01234A = x1F1F6F6F & ~x349ECCB5;

    x101884B4 = x5A1987B4 & x349ECCB5;
    x0FF8EB24 = x1FE06F90 ^ x101884B4;
    x41413333 = x43433333 & x55557777;
    x4FF9FB37 = x0FF8EB24 | x41413333;
    x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
    x30 = x4FC2FBC2 & a2;
    x31 = x30 ^ x271C52A7;
    *out4 ^= x31;

    x22222222 = a1 ^ x77777777;
    x16BCEE97 = x349ECCB5 ^ x22222222;
    x0F080B04 = a4 & x0FF8EB24;
    x19B4E593 = x16BCEE97 ^ x0F080B04;
    x00 = x0B01234A | a2;
    x01 = x00 ^ x19B4E593;
    *out1 ^= x01;

    x5C5C5C5C = x1F1F6F6F ^ x43433333;
    x4448184C = x5C5C5C5C & ~x19B4E593;
    x2DDABE71 = x22225555 ^ x0FF8EB24;
    x6992A63D = x4448184C ^ x2DDABE71;
    x10 = x1F1F6F6F & a2;
    x11 = x10 ^ x6992A63D;
    *out2 ^= x11;
}

DECLSPEC void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
    u32 x33CC33CC;
    u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
    u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
    u32 x09030C06, x09030000, x336622FF, x3A6522FF;
    u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
    u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
    u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
    u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
    u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
    u32 x00, x01, x10, x11, x20, x21, x30, x31;

    x33CC33CC = a2 ^ a5;

    x3333FFFF = a2 | a6;
    x11115555 = a1 & x3333FFFF;
    x22DD6699 = x33CC33CC ^ x11115555;
    x22DD9966 = a6 ^ x22DD6699;
    x00220099 = a5 & ~x22DD9966;

    x00551144 = a1 & x22DD9966;
    x33662277 = a2 ^ x00551144;
    x5A5A5A5A = a1 ^ a3;
    x7B7E7A7F = x33662277 | x5A5A5A5A;
    x59A31CE6 = x22DD6699 ^ x7B7E7A7F;

    x09030C06 = a3 & x59A31CE6;
    x09030000 = x09030C06 & ~a6;
    x336622FF = x00220099 | x33662277;
    x3A6522FF = x09030000 ^ x336622FF;
    x30 = x3A6522FF & a4;
    x31 = x30 ^ x59A31CE6;
    *out4 ^= x31;

    x484D494C = a2 ^ x7B7E7A7F;
    x0000B6B3 = a6 & ~x484D494C;
    x0F0FB9BC = a3 ^ x0000B6B3;
    x00FC00F9 = a5 & ~x09030C06;
    x0FFFB9FD = x0F0FB9BC | x00FC00F9;

    x5DF75DF7 = a1 | x59A31CE6;
    x116600F7 = x336622FF & x5DF75DF7;
    x1E69B94B = x0F0FB9BC ^ x116600F7;
    x1668B94B = x1E69B94B & ~x09030000;
    x20 = x00220099 | a4;
    x21 = x20 ^ x1668B94B;
    *out3 ^= x21;

    x7B7B7B7B = a2 | x5A5A5A5A;
    x411E5984 = x3A6522FF ^ x7B7B7B7B;
    x1FFFFDFD = x11115555 | x0FFFB9FD;
    x5EE1A479 = x411E5984 ^ x1FFFFDFD;

    x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
    x004B002D = a5 & ~x3CB4DFD2;
    xB7B2B6B3 = ~x484D494C;
    xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
    xCC82CDE5 = x004B002D ^ xCCC9CDC8;
    x10 = xCC82CDE5 & ~a4;
    x11 = x10 ^ x5EE1A479;
    *out2 ^= x11;

    x0055EEBB = a6 ^ x00551144;
    x5A5AECE9 = a1 ^ x0F0FB9BC;
    x0050ECA9 = x0055EEBB & x5A5AECE9;
    xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
    xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
    x00 = x0FFFB9FD & ~a4;
    x01 = x00 ^ xC59A2D67;
    *out1 ^= x01;
}

DECLSPEC void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
    u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
    u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
    u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
    u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
    u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
    u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
    u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
    u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
    u32 x00, x01, x10, x11, x20, x21, x30, x31;

    x0FF00FF0 = a4 ^ a5;
    x3CC33CC3 = a3 ^ x0FF00FF0;
    x00003CC3 = a6 & x3CC33CC3;
    x0F000F00 = a4 & x0FF00FF0;
    x5A555A55 = a2 ^ x0F000F00;
    x00001841 = x00003CC3 & x5A555A55;

    x00000F00 = a6 & x0F000F00;
    x33333C33 = a3 ^ x00000F00;
    x7B777E77 = x5A555A55 | x33333C33;
    x0FF0F00F = a6 ^ x0FF00FF0;
    x74878E78 = x7B777E77 ^ x0FF0F00F;
    x30 = a1 & ~x00001841;
    x31 = x30 ^ x74878E78;
    *out4 ^= x31;

    x003C003C = a5 & ~x3CC33CC3;
    x5A7D5A7D = x5A555A55 | x003C003C;
    x333300F0 = x00003CC3 ^ x33333C33;
    x694E5A8D = x5A7D5A7D ^ x333300F0;

    x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
    x000F0303 = a4 & ~x0FF0CCCC;
    x5A505854 = x5A555A55 & ~x000F0303;
    x33CC000F = a5 ^ x333300F0;
    x699C585B = x5A505854 ^ x33CC000F;

    x7F878F78 = x0F000F00 | x74878E78;
    x21101013 = a3 & x699C585B;
    x7F979F7B = x7F878F78 | x21101013;
    x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
    x4F9493BB = x7F979F7B ^ x30030CC0;
    x00 = x4F9493BB & ~a1;
    x01 = x00 ^ x694E5A8D;
    *out1 ^= x01;

    x6F9CDBFB = x699C585B | x4F9493BB;
    x0000DBFB = a6 & x6F9CDBFB;
    x00005151 = a2 & x0000DBFB;
    x26DAC936 = x694E5A8D ^ x4F9493BB;
    x26DA9867 = x00005151 ^ x26DAC936;

    x27DA9877 = x21101013 | x26DA9867;
    x27DA438C = x0000DBFB ^ x27DA9877;
    x2625C9C9 = a5 ^ x26DAC936;
    x27FFCBCD = x27DA438C | x2625C9C9;
    x20 = x27FFCBCD & a1;
    x21 = x20 ^ x699C585B;
    *out3 ^= x21;

    x27FF1036 = x0000DBFB ^ x27FFCBCD;
    x27FF103E = x003C003C | x27FF1036;
    xB06B6C44 = ~x4F9493BB;
    x97947C7A = x27FF103E ^ xB06B6C44;
    x10 = x97947C7A & ~a1;
    x11 = x10 ^ x26DA9867;
    *out2 ^= x11;
}

DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
    u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
    u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
    u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
    u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
    u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
    u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
    u32 xF700A600, x61008000, x03B7856B, x62B7056B;
    u32 x00, x01, x10, x11, x20, x21, x30, x31;

    x0C0C0C0C = a3 & ~a2;
    x0000F0F0 = a5 & ~a3;
    x00FFF00F = a4 ^ x0000F0F0;
    x00555005 = a1 & x00FFF00F;
    x00515001 = x00555005 & ~x0C0C0C0C;

    x33000330 = a2 & ~x00FFF00F;
    x77555775 = a1 | x33000330;
    x30303030 = a2 & ~a3;
    x3030CFCF = a5 ^ x30303030;
    x30104745 = x77555775 & x3030CFCF;
    x30555745 = x00555005 | x30104745;

    xFF000FF0 = ~x00FFF00F;
    xCF1048B5 = x30104745 ^ xFF000FF0;
    x080A080A = a3 & ~x77555775;
    xC71A40BF = xCF1048B5 ^ x080A080A;
    xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
    x10 = x00515001 | a6;
    x11 = x10 ^ xCB164CB3;
    *out2 ^= x11;

    x9E4319E6 = a1 ^ xCB164CB3;
    x000019E6 = a5 & x9E4319E6;
    xF429738C = a2 ^ xC71A40BF;
    xF4296A6A = x000019E6 ^ xF429738C;
    xC729695A = x33000330 ^ xF4296A6A;

    xC47C3D2F = x30555745 ^ xF4296A6A;
    xF77F3F3F = a2 | xC47C3D2F;
    x9E43E619 = a5 ^ x9E4319E6;
    x693CD926 = xF77F3F3F ^ x9E43E619;
    x20 = x30555745 & a6;
    x21 = x20 ^ x693CD926;
    *out3 ^= x21;

    xF719A695 = x3030CFCF ^ xC729695A;
    xF4FF73FF = a4 | xF429738C;
    x03E6D56A = xF719A695 ^ xF4FF73FF;
    x56B3803F = a1 ^ x03E6D56A;
    x30 = x56B3803F & a6;
    x31 = x30 ^ xC729695A;
    *out4 ^= x31;

    xF700A600 = xF719A695 & ~a4;
    x61008000 = x693CD926 & xF700A600;
    x03B7856B = x00515001 ^ x03E6D56A;
    x62B7056B = x61008000 ^ x03B7856B;
    x00 = x62B7056B | a6;
    x01 = x00 ^ xC729695A;
    *out1 ^= x01;
}

#endif
#endif

#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC

/*
 * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
 * architectures.  These use AND, OR, XOR, NOT, and AND-NOT gates.
 *
 * Gate counts: 49 44 46 33 48 46 46 41
 * Average: 44.125
 *
 * Several same-gate-count expressions for each S-box are included (for use on
 * different CPUs/GPUs).
 *
 * These Boolean expressions corresponding to DES S-boxes have been generated
 * by Roman Rusakov <roman_rus at openwall.com> for use in Openwall's
 * John the Ripper password cracker: http://www.openwall.com/john/
 * Being mathematical formulas, they are not copyrighted and are free for reuse
 * by anyone.
 *
 * This file (a specific representation of the S-box expressions, surrounding
 * logic) is Copyright (c) 2011 by Solar Designer <solar at openwall.com>.
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted.  (This is a heavily cut-down "BSD license".)
 *
 * The effort has been sponsored by Rapid7: http://www.rapid7.com
 */

DECLSPEC void s1 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
    u32 x55005500, x5A0F5A0F, x3333FFFF, x66666666, x22226666, x2D2D6969,
        x25202160;
    u32 x00FFFF00, x33CCCC33, x4803120C, x2222FFFF, x6A21EDF3, x4A01CC93;
    u32 x5555FFFF, x7F75FFFF, x00D20096, x7FA7FF69;
    u32 x0A0A0000, x0AD80096, x00999900, x0AD99996;
    u32 x22332233, x257AA5F0, x054885C0, xFAB77A3F, x2221EDF3, xD89697CC;
    u32 x05B77AC0, x05F77AD6, x36C48529, x6391D07C, xBB0747B0;
    u32 x4C460000, x4EDF9996, x2D4E49EA, xBBFFFFB0, x96B1B65A;
    u32 x5AFF5AFF, x52B11215, x4201C010, x10B0D205;
    u32 x00, x01, x10, x11, x20, x21, x30, x31;

    x55005500 = a1 & ~a5;
    x5A0F5A0F = a4 ^ x55005500;
    x3333FFFF = a3 | a6;
    x66666666 = a1 ^ a3;
    x22226666 = x3333FFFF & x66666666;
    x2D2D6969 = a4 ^ x22226666;
    x25202160 = x2D2D6969 & ~x5A0F5A0F;

    x00FFFF00 = a5 ^ a6;
    x33CCCC33 = a3 ^ x00FFFF00;
    x4803120C = x5A0F5A0F & ~x33CCCC33;
    x2222FFFF = a6 | x22226666;
    x6A21EDF3 = x4803120C ^ x2222FFFF;
    x4A01CC93 = x6A21EDF3 & ~x25202160;

    x5555FFFF = a1 | a6;
    x7F75FFFF = x6A21EDF3 | x5555FFFF;
    x00D20096 = a5 & ~x2D2D6969;
    x7FA7FF69 = x7F75FFFF ^ x00D20096;

    x0A0A0000 = a4 & ~x5555FFFF;
    x0AD80096 = x00D20096 ^ x0A0A0000;
    x00999900 = x00FFFF00 & ~x66666666;
    x0AD99996 = x0AD80096 | x00999900;

    x22332233 = a3 & ~x55005500;
    x257AA5F0 = x5A0F5A0F ^ x7F75FFFF;
    x054885C0 = x257AA5F0 & ~x22332233;
    xFAB77A3F = ~x054885C0;
    x2221EDF3 = x3333FFFF & x6A21EDF3;
    xD89697CC = xFAB77A3F ^ x2221EDF3;
    x20 = x7FA7FF69 & ~a2;
    x21 = x20 ^ xD89697CC;
    *out3 ^= x21;

    x05B77AC0 = x00FFFF00 ^ x054885C0;
    x05F77AD6 = x00D20096 | x05B77AC0;
    x36C48529 = x3333FFFF ^ x05F77AD6;
    x6391D07C = a1 ^ x36C48529;
    xBB0747B0 = xD89697CC ^ x6391D07C;
    x00 = x25202160 | a2;
    x01 = x00 ^ xBB0747B0;
    *out1 ^= x01;

    x4C460000 = x3333FFFF ^ x7F75FFFF;
    x4EDF9996 = x0AD99996 | x4C460000;
    x2D4E49EA = x6391D07C ^ x4EDF9996;
    xBBFFFFB0 = x00FFFF00 | xBB0747B0;
    x96B1B65A = x2D4E49EA ^ xBBFFFFB0;
    x10 = x4A01CC93 | a2;
    x11 = x10 ^ x96B1B65A;
    *out2 ^= x11;

    x5AFF5AFF = a5 | x5A0F5A0F;
    x52B11215 = x5AFF5AFF & ~x2D4E49EA;
    x4201C010 = x4A01CC93 & x6391D07C;
    x10B0D205 = x52B11215 ^ x4201C010;
    x30 = x10B0D205 | a2;
    x31 = x30 ^ x0AD99996;
    *out4 ^= x31;
}

DECLSPEC void s2 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
    u32 x33CC33CC;
    u32 x55550000, x00AA00FF, x33BB33FF;
    u32 x33CC0000, x11441144, x11BB11BB, x003311BB;
    u32 x00000F0F, x336600FF, x332200FF, x332200F0;
    u32 x0302000F, xAAAAAAAA, xA9A8AAA5, x33CCCC33, x33CCC030, x9A646A95;
    u32 x00333303, x118822B8, xA8208805, x3CC3C33C, x94E34B39;
    u32 x0331330C, x3FF3F33C, xA9DF596A, xA9DF5F6F, x962CAC53;
    u32 xA9466A6A, x3DA52153, x29850143, x33C0330C, x1A45324F;
    u32 x0A451047, xBBDFDD7B, xB19ACD3C;
    u32 x00, x01, x10, x11, x20, x21, x30, x31;

    x33CC33CC = a2 ^ a5;

    x55550000 = a1 & ~a6;
    x00AA00FF = a5 & ~x55550000;
    x33BB33FF = a2 | x00AA00FF;

    x33CC0000 = x33CC33CC & ~a6;
    x11441144 = a1 & x33CC33CC;
    x11BB11BB = a5 ^ x11441144;
    x003311BB = x11BB11BB & ~x33CC0000;

    x00000F0F = a3 & a6;
    x336600FF = x00AA00FF ^ x33CC0000;
    x332200FF = x33BB33FF & x336600FF;
    x332200F0 = x332200FF & ~x00000F0F;

    x0302000F = a3 & x332200FF;
    xAAAAAAAA = ~a1;
    xA9A8AAA5 = x0302000F ^ xAAAAAAAA;
    x33CCCC33 = a6 ^ x33CC33CC;
    x33CCC030 = x33CCCC33 & ~x00000F0F;
    x9A646A95 = xA9A8AAA5 ^ x33CCC030;
    x10 = a4 & ~x332200F0;
    x11 = x10 ^ x9A646A95;
    *out2 ^= x11;

    x00333303 = a2 & ~x33CCC030;
    x118822B8 = x11BB11BB ^ x00333303;
    xA8208805 = xA9A8AAA5 & ~x118822B8;
    x3CC3C33C = a3 ^ x33CCCC33;
    x94E34B39 = xA8208805 ^ x3CC3C33C;
    x00 = x33BB33FF & ~a4;
    x01 = x00 ^ x94E34B39;
    *out1 ^= x01;

    x0331330C = x0302000F ^ x00333303;
    x3FF3F33C = x3CC3C33C | x0331330C;
    xA9DF596A = x33BB33FF ^ x9A646A95;
    xA9DF5F6F = x00000F0F | xA9DF596A;
    x962CAC53 = x3FF3F33C ^ xA9DF5F6F;

    xA9466A6A = x332200FF ^ x9A646A95;
    x3DA52153 = x94E34B39 ^ xA9466A6A;
    x29850143 = xA9DF5F6F & x3DA52153;
    x33C0330C = x33CC33CC & x3FF3F33C;
    x1A45324F = x29850143 ^ x33C0330C;
    x20 = x1A45324F | a4;
    x21 = x20 ^ x962CAC53;
    *out3 ^= x21;

    x0A451047 = x1A45324F & ~x118822B8;
    xBBDFDD7B = x33CCCC33 | xA9DF596A;
    xB19ACD3C = x0A451047 ^ xBBDFDD7B;
    x30 = x003311BB | a4;
    x31 = x30 ^ xB19ACD3C;
    *out4 ^= x31;
}

DECLSPEC void s3 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
    u32 x44444444, x0F0FF0F0, x4F4FF4F4, x00FFFF00, x00AAAA00, x4FE55EF4;
    u32 x3C3CC3C3, x3C3C0000, x7373F4F4, x0C840A00;
    u32 x00005EF4, x00FF5EFF, x00555455, x3C699796;
    u32 x000FF000, x55AA55AA, x26D9A15E, x2FDFAF5F, x2FD00F5F;
    u32 x55AAFFAA, x28410014, x000000FF, x000000CC, x284100D8;
    u32 x204100D0, x3C3CC3FF, x1C3CC32F, x4969967A;
    u32 x4CC44CC4, x40C040C0, xC3C33C3C, x9669C396, xD6A98356;
    u32 xD6E9C3D6, x4CEEEEC4, x9A072D12, x001A000B, x9A1F2D1B;
    u32 x00, x01, x10, x11, x20, x21, x30, x31;

    x44444444 = a1 & ~a2;
    x0F0FF0F0 = a3 ^ a6;
    x4F4FF4F4 = x44444444 | x0F0FF0F0;
    x00FFFF00 = a4 ^ a6;
    x00AAAA00 = x00FFFF00 & ~a1;
    x4FE55EF4 = x4F4FF4F4 ^ x00AAAA00;

    x3C3CC3C3 = a2 ^ x0F0FF0F0;
    x3C3C0000 = x3C3CC3C3 & ~a6;
    x7373F4F4 = x4F4FF4F4 ^ x3C3C0000;
    x0C840A00 = x4FE55EF4 & ~x7373F4F4;

    x00005EF4 = a6 & x4FE55EF4;
    x00FF5EFF = a4 | x00005EF4;
    x00555455 = a1 & x00FF5EFF;
    x3C699796 = x3C3CC3C3 ^ x00555455;
    x30 = x4FE55EF4 & ~a5;
    x31 = x30 ^ x3C699796;
    *out4 ^= x31;

    x000FF000 = x0F0FF0F0 & x00FFFF00;
    x55AA55AA = a1 ^ a4;
    x26D9A15E = x7373F4F4 ^ x55AA55AA;
    x2FDFAF5F = a3 | x26D9A15E;
    x2FD00F5F = x2FDFAF5F & ~x000FF000;

    x55AAFFAA = x00AAAA00 | x55AA55AA;
    x28410014 = x3C699796 & ~x55AAFFAA;
    x000000FF = a4 & a6;
    x000000CC = x000000FF & ~a2;
    x284100D8 = x28410014 ^ x000000CC;

    x204100D0 = x7373F4F4 & x284100D8;
    x3C3CC3FF = x3C3CC3C3 | x000000FF;
    x1C3CC32F = x3C3CC3FF & ~x204100D0;
    x4969967A = a1 ^ x1C3CC32F;
    x10 = x2FD00F5F & a5;
    x11 = x10 ^ x4969967A;
    *out2 ^= x11;

    x4CC44CC4 = x4FE55EF4 & ~a2;
    x40C040C0 = x4CC44CC4 & ~a3;
    xC3C33C3C = ~x3C3CC3C3;
    x9669C396 = x55AAFFAA ^ xC3C33C3C;
    xD6A98356 = x40C040C0 ^ x9669C396;
    x00 = a5 & ~x0C840A00;
    x01 = x00 ^ xD6A98356;
    *out1 ^= x01;

    xD6E9C3D6 = x40C040C0 | x9669C396;
    x4CEEEEC4 = x00AAAA00 | x4CC44CC4;
    x9A072D12 = xD6E9C3D6 ^ x4CEEEEC4;
    x001A000B = a4 & ~x4FE55EF4;
    x9A1F2D1B = x9A072D12 | x001A000B;
    x20 = a5 & ~x284100D8;
    x21 = x20 ^ x9A1F2D1B;
    *out3 ^= x21;
}

DECLSPEC void s4 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
    u32 x5A5A5A5A, x0F0FF0F0;
    u32 x33FF33FF, x33FFCC00, x0C0030F0, x0C0CC0C0, x0CF3C03F, x5EFBDA7F,
        x52FBCA0F, x61C8F93C;
    u32 x00C0C03C, x0F0F30C0, x3B92A366, x30908326, x3C90B3D6;
    u32 x33CC33CC, x0C0CFFFF, x379E5C99, x04124C11, x56E9861E, xA91679E1;
    u32 x9586CA37, x8402C833, x84C2C83F, xB35C94A6;
    u32 x00, x01, x10, x11, x20, x21, x30, x31;

    x5A5A5A5A = a1 ^ a3;
    x0F0FF0F0 = a3 ^ a5;
    x33FF33FF = a2 | a4;
    x33FFCC00 = a5 ^ x33FF33FF;
    x0C0030F0 = x0F0FF0F0 & ~x33FFCC00;
    x0C0CC0C0 = x0F0FF0F0 & ~a2;
    x0CF3C03F = a4 ^ x0C0CC0C0;
    x5EFBDA7F = x5A5A5A5A | x0CF3C03F;
    x52FBCA0F = x5EFBDA7F & ~x0C0030F0;
    x61C8F93C = a2 ^ x52FBCA0F;

    x00C0C03C = x0CF3C03F & x61C8F93C;
    x0F0F30C0 = x0F0FF0F0 & ~x00C0C03C;
    x3B92A366 = x5A5A5A5A ^ x61C8F93C;
    x30908326 = x3B92A366 & ~x0F0F30C0;
    x3C90B3D6 = x0C0030F0 ^ x30908326;

    x33CC33CC = a2 ^ a4;
    x0C0CFFFF = a5 | x0C0CC0C0;
    x379E5C99 = x3B92A366 ^ x0C0CFFFF;
    x04124C11 = x379E5C99 & ~x33CC33CC;
    x56E9861E = x52FBCA0F ^ x04124C11;
    x00 = a6 & ~x3C90B3D6;
    x01 = x00 ^ x56E9861E;
    *out1 ^= x01;

    xA91679E1 = ~x56E9861E;
    x10 = x3C90B3D6 & ~a6;
    x11 = x10 ^ xA91679E1;
    *out2 ^= x11;

    x9586CA37 = x3C90B3D6 ^ xA91679E1;
    x8402C833 = x9586CA37 & ~x33CC33CC;
    x84C2C83F = x00C0C03C | x8402C833;
    xB35C94A6 = x379E5C99 ^ x84C2C83F;
    x20 = x61C8F93C | a6;
    x21 = x20 ^ xB35C94A6;
    *out3 ^= x21;

    x30 = a6 & x61C8F93C;
    x31 = x30 ^ xB35C94A6;
    *out4 ^= x31;
}

DECLSPEC void s5 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
    u32 x77777777, x77770000, x22225555, x11116666, x1F1F6F6F;
    u32 x70700000, x43433333, x00430033, x55557777, x55167744, x5A19784B;
    u32 x5A1987B4, x7A3BD7F5, x003B00F5, x221955A0, x05050707, x271C52A7;
    u32 x2A2A82A0, x6969B193, x1FE06F90, x16804E00, xE97FB1FF;
    u32 x43403302, x35CAED30, x37DEFFB7, x349ECCB5, x0B01234A;
    u32 x101884B4, x0FF8EB24, x41413333, x4FF9FB37, x4FC2FBC2;
    u32 x22222222, x16BCEE97, x0F080B04, x19B4E593;
    u32 x5C5C5C5C, x4448184C, x2DDABE71, x6992A63D;
    u32 x00, x01, x10, x11, x20, x21, x30, x31;

    x77777777 = a1 | a3;
    x77770000 = x77777777 & ~a6;
    x22225555 = a1 ^ x77770000;
    x11116666 = a3 ^ x22225555;
    x1F1F6F6F = a4 | x11116666;

    x70700000 = x77770000 & ~a4;
    x43433333 = a3 ^ x70700000;
    x00430033 = a5 & x43433333;
    x55557777 = a1 | x11116666;
    x55167744 = x00430033 ^ x55557777;
    x5A19784B = a4 ^ x55167744;

    x5A1987B4 = a6 ^ x5A19784B;
    x7A3BD7F5 = x22225555 | x5A1987B4;
    x003B00F5 = a5 & x7A3BD7F5;
    x221955A0 = x22225555 ^ x003B00F5;
    x05050707 = a4 & x55557777;
    x271C52A7 = x221955A0 ^ x05050707;

    x2A2A82A0 = x7A3BD7F5 & ~a1;
    x6969B193 = x43433333 ^ x2A2A82A0;
    x1FE06F90 = a5 ^ x1F1F6F6F;
    x16804E00 = x1FE06F90 & ~x6969B193;
    xE97FB1FF = ~x16804E00;
    x20 = xE97FB1FF & ~a2;
    x21 = x20 ^ x5A19784B;
    *out3 ^= x21;

    x43403302 = x43433333 & ~x003B00F5;
    x35CAED30 = x2A2A82A0 ^ x1FE06F90;
    x37DEFFB7 = x271C52A7 | x35CAED30;
    x349ECCB5 = x37DEFFB7 & ~x43403302;
    x0B01234A = x1F1F6F6F & ~x349ECCB5;

    x101884B4 = x5A1987B4 & x349ECCB5;
    x0FF8EB24 = x1FE06F90 ^ x101884B4;
    x41413333 = x43433333 & x55557777;
    x4FF9FB37 = x0FF8EB24 | x41413333;
    x4FC2FBC2 = x003B00F5 ^ x4FF9FB37;
    x30 = x4FC2FBC2 & a2;
    x31 = x30 ^ x271C52A7;
    *out4 ^= x31;

    x22222222 = a1 ^ x77777777;
    x16BCEE97 = x349ECCB5 ^ x22222222;
    x0F080B04 = a4 & x0FF8EB24;
    x19B4E593 = x16BCEE97 ^ x0F080B04;
    x00 = x0B01234A | a2;
    x01 = x00 ^ x19B4E593;
    *out1 ^= x01;

    x5C5C5C5C = x1F1F6F6F ^ x43433333;
    x4448184C = x5C5C5C5C & ~x19B4E593;
    x2DDABE71 = x22225555 ^ x0FF8EB24;
    x6992A63D = x4448184C ^ x2DDABE71;
    x10 = x1F1F6F6F & a2;
    x11 = x10 ^ x6992A63D;
    *out2 ^= x11;
}

DECLSPEC void s6 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
    u32 x33CC33CC;
    u32 x3333FFFF, x11115555, x22DD6699, x22DD9966, x00220099;
    u32 x00551144, x33662277, x5A5A5A5A, x7B7E7A7F, x59A31CE6;
    u32 x09030C06, x09030000, x336622FF, x3A6522FF;
    u32 x484D494C, x0000B6B3, x0F0FB9BC, x00FC00F9, x0FFFB9FD;
    u32 x5DF75DF7, x116600F7, x1E69B94B, x1668B94B;
    u32 x7B7B7B7B, x411E5984, x1FFFFDFD, x5EE1A479;
    u32 x3CB4DFD2, x004B002D, xB7B2B6B3, xCCC9CDC8, xCC82CDE5;
    u32 x0055EEBB, x5A5AECE9, x0050ECA9, xC5CAC1CE, xC59A2D67;
    u32 x00, x01, x10, x11, x20, x21, x30, x31;

    x33CC33CC = a2 ^ a5;

    x3333FFFF = a2 | a6;
    x11115555 = a1 & x3333FFFF;
    x22DD6699 = x33CC33CC ^ x11115555;
    x22DD9966 = a6 ^ x22DD6699;
    x00220099 = a5 & ~x22DD9966;

    x00551144 = a1 & x22DD9966;
    x33662277 = a2 ^ x00551144;
    x5A5A5A5A = a1 ^ a3;
    x7B7E7A7F = x33662277 | x5A5A5A5A;
    x59A31CE6 = x22DD6699 ^ x7B7E7A7F;

    x09030C06 = a3 & x59A31CE6;
    x09030000 = x09030C06 & ~a6;
    x336622FF = x00220099 | x33662277;
    x3A6522FF = x09030000 ^ x336622FF;
    x30 = x3A6522FF & a4;
    x31 = x30 ^ x59A31CE6;
    *out4 ^= x31;

    x484D494C = a2 ^ x7B7E7A7F;
    x0000B6B3 = a6 & ~x484D494C;
    x0F0FB9BC = a3 ^ x0000B6B3;
    x00FC00F9 = a5 & ~x09030C06;
    x0FFFB9FD = x0F0FB9BC | x00FC00F9;

    x5DF75DF7 = a1 | x59A31CE6;
    x116600F7 = x336622FF & x5DF75DF7;
    x1E69B94B = x0F0FB9BC ^ x116600F7;
    x1668B94B = x1E69B94B & ~x09030000;
    x20 = x00220099 | a4;
    x21 = x20 ^ x1668B94B;
    *out3 ^= x21;

    x7B7B7B7B = a2 | x5A5A5A5A;
    x411E5984 = x3A6522FF ^ x7B7B7B7B;
    x1FFFFDFD = x11115555 | x0FFFB9FD;
    x5EE1A479 = x411E5984 ^ x1FFFFDFD;

    x3CB4DFD2 = x22DD6699 ^ x1E69B94B;
    x004B002D = a5 & ~x3CB4DFD2;
    xB7B2B6B3 = ~x484D494C;
    xCCC9CDC8 = x7B7B7B7B ^ xB7B2B6B3;
    xCC82CDE5 = x004B002D ^ xCCC9CDC8;
    x10 = xCC82CDE5 & ~a4;
    x11 = x10 ^ x5EE1A479;
    *out2 ^= x11;

    x0055EEBB = a6 ^ x00551144;
    x5A5AECE9 = a1 ^ x0F0FB9BC;
    x0050ECA9 = x0055EEBB & x5A5AECE9;
    xC5CAC1CE = x09030C06 ^ xCCC9CDC8;
    xC59A2D67 = x0050ECA9 ^ xC5CAC1CE;
    x00 = x0FFFB9FD & ~a4;
    x01 = x00 ^ xC59A2D67;
    *out1 ^= x01;
}

DECLSPEC void s7 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
    u32 x0FF00FF0, x3CC33CC3, x00003CC3, x0F000F00, x5A555A55, x00001841;
    u32 x00000F00, x33333C33, x7B777E77, x0FF0F00F, x74878E78;
    u32 x003C003C, x5A7D5A7D, x333300F0, x694E5A8D;
    u32 x0FF0CCCC, x000F0303, x5A505854, x33CC000F, x699C585B;
    u32 x7F878F78, x21101013, x7F979F7B, x30030CC0, x4F9493BB;
    u32 x6F9CDBFB, x0000DBFB, x00005151, x26DAC936, x26DA9867;
    u32 x27DA9877, x27DA438C, x2625C9C9, x27FFCBCD;
    u32 x27FF1036, x27FF103E, xB06B6C44, x97947C7A;
    u32 x00, x01, x10, x11, x20, x21, x30, x31;

    x0FF00FF0 = a4 ^ a5;
    x3CC33CC3 = a3 ^ x0FF00FF0;
    x00003CC3 = a6 & x3CC33CC3;
    x0F000F00 = a4 & x0FF00FF0;
    x5A555A55 = a2 ^ x0F000F00;
    x00001841 = x00003CC3 & x5A555A55;

    x00000F00 = a6 & x0F000F00;
    x33333C33 = a3 ^ x00000F00;
    x7B777E77 = x5A555A55 | x33333C33;
    x0FF0F00F = a6 ^ x0FF00FF0;
    x74878E78 = x7B777E77 ^ x0FF0F00F;
    x30 = a1 & ~x00001841;
    x31 = x30 ^ x74878E78;
    *out4 ^= x31;

    x003C003C = a5 & ~x3CC33CC3;
    x5A7D5A7D = x5A555A55 | x003C003C;
    x333300F0 = x00003CC3 ^ x33333C33;
    x694E5A8D = x5A7D5A7D ^ x333300F0;

    x0FF0CCCC = x00003CC3 ^ x0FF0F00F;
    x000F0303 = a4 & ~x0FF0CCCC;
    x5A505854 = x5A555A55 & ~x000F0303;
    x33CC000F = a5 ^ x333300F0;
    x699C585B = x5A505854 ^ x33CC000F;

    x7F878F78 = x0F000F00 | x74878E78;
    x21101013 = a3 & x699C585B;
    x7F979F7B = x7F878F78 | x21101013;
    x30030CC0 = x3CC33CC3 & ~x0FF0F00F;
    x4F9493BB = x7F979F7B ^ x30030CC0;
    x00 = x4F9493BB & ~a1;
    x01 = x00 ^ x694E5A8D;
    *out1 ^= x01;

    x6F9CDBFB = x699C585B | x4F9493BB;
    x0000DBFB = a6 & x6F9CDBFB;
    x00005151 = a2 & x0000DBFB;
    x26DAC936 = x694E5A8D ^ x4F9493BB;
    x26DA9867 = x00005151 ^ x26DAC936;

    x27DA9877 = x21101013 | x26DA9867;
    x27DA438C = x0000DBFB ^ x27DA9877;
    x2625C9C9 = a5 ^ x26DAC936;
    x27FFCBCD = x27DA438C | x2625C9C9;
    x20 = x27FFCBCD & a1;
    x21 = x20 ^ x699C585B;
    *out3 ^= x21;

    x27FF1036 = x0000DBFB ^ x27FFCBCD;
    x27FF103E = x003C003C | x27FF1036;
    xB06B6C44 = ~x4F9493BB;
    x97947C7A = x27FF103E ^ xB06B6C44;
    x10 = x97947C7A & ~a1;
    x11 = x10 ^ x26DA9867;
    *out2 ^= x11;
}

DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const u32 a5, const u32 a6, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2, PRIVATE_AS u32 *out3, PRIVATE_AS u32 *out4)
{
    u32 x0C0C0C0C, x0000F0F0, x00FFF00F, x00555005, x00515001;
    u32 x33000330, x77555775, x30303030, x3030CFCF, x30104745, x30555745;
    u32 xFF000FF0, xCF1048B5, x080A080A, xC71A40BF, xCB164CB3;
    u32 x9E4319E6, x000019E6, xF429738C, xF4296A6A, xC729695A;
    u32 xC47C3D2F, xF77F3F3F, x9E43E619, x693CD926;
    u32 xF719A695, xF4FF73FF, x03E6D56A, x56B3803F;
    u32 xF700A600, x61008000, x03B7856B, x62B7056B;
    u32 x00, x01, x10, x11, x20, x21, x30, x31;

    x0C0C0C0C = a3 & ~a2;
    x0000F0F0 = a5 & ~a3;
    x00FFF00F = a4 ^ x0000F0F0;
    x00555005 = a1 & x00FFF00F;
    x00515001 = x00555005 & ~x0C0C0C0C;

    x33000330 = a2 & ~x00FFF00F;
    x77555775 = a1 | x33000330;
    x30303030 = a2 & ~a3;
    x3030CFCF = a5 ^ x30303030;
    x30104745 = x77555775 & x3030CFCF;
    x30555745 = x00555005 | x30104745;

    xFF000FF0 = ~x00FFF00F;
    xCF1048B5 = x30104745 ^ xFF000FF0;
    x080A080A = a3 & ~x77555775;
    xC71A40BF = xCF1048B5 ^ x080A080A;
    xCB164CB3 = x0C0C0C0C ^ xC71A40BF;
    x10 = x00515001 | a6;
    x11 = x10 ^ xCB164CB3;
    *out2 ^= x11;

    x9E4319E6 = a1 ^ xCB164CB3;
    x000019E6 = a5 & x9E4319E6;
    xF429738C = a2 ^ xC71A40BF;
    xF4296A6A = x000019E6 ^ xF429738C;
    xC729695A = x33000330 ^ xF4296A6A;

    xC47C3D2F = x30555745 ^ xF4296A6A;
    xF77F3F3F = a2 | xC47C3D2F;
    x9E43E619 = a5 ^ x9E4319E6;
    x693CD926 = xF77F3F3F ^ x9E43E619;
    x20 = x30555745 & a6;
    x21 = x20 ^ x693CD926;
    *out3 ^= x21;

    xF719A695 = x3030CFCF ^ xC729695A;
    xF4FF73FF = a4 | xF429738C;
    x03E6D56A = xF719A695 ^ xF4FF73FF;
    x56B3803F = a1 ^ x03E6D56A;
    x30 = x56B3803F & a6;
    x31 = x30 ^ xC729695A;
    *out4 ^= x31;

    xF700A600 = xF719A695 & ~a4;
    x61008000 = x693CD926 & xF700A600;
    x03B7856B = x00515001 ^ x03E6D56A;
    x62B7056B = x61008000 ^ x03B7856B;
    x00 = x62B7056B | a6;
    x01 = x00 ^ xC729695A;
    *out1 ^= x01;
}

#endif

//#define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; }
#define SWAP(a, b) { u32 tmp=*a;*a=*b;*b=tmp; }

#define DATASWAP  \
  SWAP (D00, D32); \
  SWAP (D01, D33); \
  SWAP (D02, D34); \
  SWAP (D03, D35); \
  SWAP (D04, D36); \
  SWAP (D05, D37); \
  SWAP (D06, D38); \
  SWAP (D07, D39); \
  SWAP (D08, D40); \
  SWAP (D09, D41); \
  SWAP (D10, D42); \
  SWAP (D11, D43); \
  SWAP (D12, D44); \
  SWAP (D13, D45); \
  SWAP (D14, D46); \
  SWAP (D15, D47); \
  SWAP (D16, D48); \
  SWAP (D17, D49); \
  SWAP (D18, D50); \
  SWAP (D19, D51); \
  SWAP (D20, D52); \
  SWAP (D21, D53); \
  SWAP (D22, D54); \
  SWAP (D23, D55); \
  SWAP (D24, D56); \
  SWAP (D25, D57); \
  SWAP (D26, D58); \
  SWAP (D27, D59); \
  SWAP (D28, D60); \
  SWAP (D29, D61); \
  SWAP (D30, D62); \
  SWAP (D31, D63);

#define KEYSET00 { k00 = K08; k01 = K44; k02 = K29; k03 = K52; k04 = K42; k05 = K14; k06 = K28; k07 = K49; k08 = K01; k09 = K07; k10 = K16; k11 = K36; k12 = K02; k13 = K30; k14 = K22; k15 = K21; k16 = K38; k17 = K50; k18 = K51; k19 = K00; k20 = K31; k21 = K23; k22 = K15; k23 = K35; k24 = K19; k25 = K24; k26 = K34; k27 = K47; k28 = K32; k29 = K03; k30 = K41; k31 = K26; k32 = K04; k33 = K46; k34 = K20; k35 = K25; k36 = K53; k37 = K18; k38 = K33; k39 = K55; k40 = K13; k41 = K17; k42 = K39; k43 = K12; k44 = K11; k45 = K54; k46 = K48; k47 = K27; }
#define KEYSET10 { k00 = K49; k01 = K28; k02 = K45; k03 = K36; k04 = K01; k05 = K30; k06 = K44; k07 = K08; k08 = K42; k09 = K23; k10 = K00; k11 = K52; k12 = K43; k13 = K14; k14 = K38; k15 = K37; k16 = K22; k17 = K09; k18 = K35; k19 = K16; k20 = K15; k21 = K07; k22 = K31; k23 = K51; k24 = K03; k25 = K40; k26 = K46; k27 = K04; k28 = K20; k29 = K19; k30 = K53; k31 = K10; k32 = K47; k33 = K34; k34 = K32; k35 = K13; k36 = K41; k37 = K06; k38 = K17; k39 = K12; k40 = K25; k41 = K33; k42 = K27; k43 = K55; k44 = K54; k45 = K11; k46 = K05; k47 = K39; }
#define KEYSET01 { k00 = K01; k01 = K37; k02 = K22; k03 = K45; k04 = K35; k05 = K07; k06 = K21; k07 = K42; k08 = K51; k09 = K00; k10 = K09; k11 = K29; k12 = K52; k13 = K23; k14 = K15; k15 = K14; k16 = K31; k17 = K43; k18 = K44; k19 = K50; k20 = K49; k21 = K16; k22 = K08; k23 = K28; k24 = K12; k25 = K17; k26 = K27; k27 = K40; k28 = K25; k29 = K55; k30 = K34; k31 = K19; k32 = K24; k33 = K39; k34 = K13; k35 = K18; k36 = K46; k37 = K11; k38 = K26; k39 = K48; k40 = K06; k41 = K10; k42 = K32; k43 = K05; k44 = K04; k45 = K47; k46 = K41; k47 = K20; }
#define KEYSET11 { k00 = K35; k01 = K14; k02 = K31; k03 = K22; k04 = K44; k05 = K16; k06 = K30; k07 = K51; k08 = K28; k09 = K09; k10 = K43; k11 = K38; k12 = K29; k13 = K00; k14 = K49; k15 = K23; k16 = K08; k17 = K52; k18 = K21; k19 = K02; k20 = K01; k21 = K50; k22 = K42; k23 = K37; k24 = K48; k25 = K26; k26 = K32; k27 = K17; k28 = K06; k29 = K05; k30 = K39; k31 = K55; k32 = K33; k33 = K20; k34 = K18; k35 = K54; k36 = K27; k37 = K47; k38 = K03; k39 = K53; k40 = K11; k41 = K19; k42 = K13; k43 = K41; k44 = K40; k45 = K24; k46 = K46; k47 = K25; }
#define KEYSET02 { k00 = K44; k01 = K23; k02 = K08; k03 = K31; k04 = K21; k05 = K50; k06 = K07; k07 = K28; k08 = K37; k09 = K43; k10 = K52; k11 = K15; k12 = K38; k13 = K09; k14 = K01; k15 = K00; k16 = K42; k17 = K29; k18 = K30; k19 = K36; k20 = K35; k21 = K02; k22 = K51; k23 = K14; k24 = K53; k25 = K03; k26 = K13; k27 = K26; k28 = K11; k29 = K41; k30 = K20; k31 = K05; k32 = K10; k33 = K25; k34 = K54; k35 = K04; k36 = K32; k37 = K24; k38 = K12; k39 = K34; k40 = K47; k41 = K55; k42 = K18; k43 = K46; k44 = K17; k45 = K33; k46 = K27; k47 = K06; }
#define KEYSET12 { k00 = K21; k01 = K00; k02 = K42; k03 = K08; k04 = K30; k05 = K02; k06 = K16; k07 = K37; k08 = K14; k09 = K52; k10 = K29; k11 = K49; k12 = K15; k13 = K43; k14 = K35; k15 = K09; k16 = K51; k17 = K38; k18 = K07; k19 = K45; k20 = K44; k21 = K36; k22 = K28; k23 = K23; k24 = K34; k25 = K12; k26 = K18; k27 = K03; k28 = K47; k29 = K46; k30 = K25; k31 = K41; k32 = K19; k33 = K06; k34 = K04; k35 = K40; k36 = K13; k37 = K33; k38 = K48; k39 = K39; k40 = K24; k41 = K05; k42 = K54; k43 = K27; k44 = K26; k45 = K10; k46 = K32; k47 = K11; }
#define KEYSET03 { k00 = K30; k01 = K09; k02 = K51; k03 = K42; k04 = K07; k05 = K36; k06 = K50; k07 = K14; k08 = K23; k09 = K29; k10 = K38; k11 = K01; k12 = K49; k13 = K52; k14 = K44; k15 = K43; k16 = K28; k17 = K15; k18 = K16; k19 = K22; k20 = K21; k21 = K45; k22 = K37; k23 = K00; k24 = K39; k25 = K48; k26 = K54; k27 = K12; k28 = K24; k29 = K27; k30 = K06; k31 = K46; k32 = K55; k33 = K11; k34 = K40; k35 = K17; k36 = K18; k37 = K10; k38 = K53; k39 = K20; k40 = K33; k41 = K41; k42 = K04; k43 = K32; k44 = K03; k45 = K19; k46 = K13; k47 = K47; }
#define KEYSET13 { k00 = K07; k01 = K43; k02 = K28; k03 = K51; k04 = K16; k05 = K45; k06 = K02; k07 = K23; k08 = K00; k09 = K38; k10 = K15; k11 = K35; k12 = K01; k13 = K29; k14 = K21; k15 = K52; k16 = K37; k17 = K49; k18 = K50; k19 = K31; k20 = K30; k21 = K22; k22 = K14; k23 = K09; k24 = K20; k25 = K53; k26 = K04; k27 = K48; k28 = K33; k29 = K32; k30 = K11; k31 = K27; k32 = K05; k33 = K47; k34 = K17; k35 = K26; k36 = K54; k37 = K19; k38 = K34; k39 = K25; k40 = K10; k41 = K46; k42 = K40; k43 = K13; k44 = K12; k45 = K55; k46 = K18; k47 = K24; }
#define KEYSET04 { k00 = K16; k01 = K52; k02 = K37; k03 = K28; k04 = K50; k05 = K22; k06 = K36; k07 = K00; k08 = K09; k09 = K15; k10 = K49; k11 = K44; k12 = K35; k13 = K38; k14 = K30; k15 = K29; k16 = K14; k17 = K01; k18 = K02; k19 = K08; k20 = K07; k21 = K31; k22 = K23; k23 = K43; k24 = K25; k25 = K34; k26 = K40; k27 = K53; k28 = K10; k29 = K13; k30 = K47; k31 = K32; k32 = K41; k33 = K24; k34 = K26; k35 = K03; k36 = K04; k37 = K55; k38 = K39; k39 = K06; k40 = K19; k41 = K27; k42 = K17; k43 = K18; k44 = K48; k45 = K05; k46 = K54; k47 = K33; }
#define KEYSET14 { k00 = K50; k01 = K29; k02 = K14; k03 = K37; k04 = K02; k05 = K31; k06 = K45; k07 = K09; k08 = K43; k09 = K49; k10 = K01; k11 = K21; k12 = K44; k13 = K15; k14 = K07; k15 = K38; k16 = K23; k17 = K35; k18 = K36; k19 = K42; k20 = K16; k21 = K08; k22 = K00; k23 = K52; k24 = K06; k25 = K39; k26 = K17; k27 = K34; k28 = K19; k29 = K18; k30 = K24; k31 = K13; k32 = K46; k33 = K33; k34 = K03; k35 = K12; k36 = K40; k37 = K05; k38 = K20; k39 = K11; k40 = K55; k41 = K32; k42 = K26; k43 = K54; k44 = K53; k45 = K41; k46 = K04; k47 = K10; }
#define KEYSET05 { k00 = K02; k01 = K38; k02 = K23; k03 = K14; k04 = K36; k05 = K08; k06 = K22; k07 = K43; k08 = K52; k09 = K01; k10 = K35; k11 = K30; k12 = K21; k13 = K49; k14 = K16; k15 = K15; k16 = K00; k17 = K44; k18 = K45; k19 = K51; k20 = K50; k21 = K42; k22 = K09; k23 = K29; k24 = K11; k25 = K20; k26 = K26; k27 = K39; k28 = K55; k29 = K54; k30 = K33; k31 = K18; k32 = K27; k33 = K10; k34 = K12; k35 = K48; k36 = K17; k37 = K41; k38 = K25; k39 = K47; k40 = K05; k41 = K13; k42 = K03; k43 = K04; k44 = K34; k45 = K46; k46 = K40; k47 = K19; }
#define KEYSET15 { k00 = K36; k01 = K15; k02 = K00; k03 = K23; k04 = K45; k05 = K42; k06 = K31; k07 = K52; k08 = K29; k09 = K35; k10 = K44; k11 = K07; k12 = K30; k13 = K01; k14 = K50; k15 = K49; k16 = K09; k17 = K21; k18 = K22; k19 = K28; k20 = K02; k21 = K51; k22 = K43; k23 = K38; k24 = K47; k25 = K25; k26 = K03; k27 = K20; k28 = K05; k29 = K04; k30 = K10; k31 = K54; k32 = K32; k33 = K19; k34 = K48; k35 = K53; k36 = K26; k37 = K46; k38 = K06; k39 = K24; k40 = K41; k41 = K18; k42 = K12; k43 = K40; k44 = K39; k45 = K27; k46 = K17; k47 = K55; }
#define KEYSET06 { k00 = K45; k01 = K49; k02 = K09; k03 = K00; k04 = K22; k05 = K51; k06 = K08; k07 = K29; k08 = K38; k09 = K44; k10 = K21; k11 = K16; k12 = K07; k13 = K35; k14 = K02; k15 = K01; k16 = K43; k17 = K30; k18 = K31; k19 = K37; k20 = K36; k21 = K28; k22 = K52; k23 = K15; k24 = K24; k25 = K06; k26 = K12; k27 = K25; k28 = K41; k29 = K40; k30 = K19; k31 = K04; k32 = K13; k33 = K55; k34 = K53; k35 = K34; k36 = K03; k37 = K27; k38 = K11; k39 = K33; k40 = K46; k41 = K54; k42 = K48; k43 = K17; k44 = K20; k45 = K32; k46 = K26; k47 = K05; }
#define KEYSET16 { k00 = K22; k01 = K01; k02 = K43; k03 = K09; k04 = K31; k05 = K28; k06 = K42; k07 = K38; k08 = K15; k09 = K21; k10 = K30; k11 = K50; k12 = K16; k13 = K44; k14 = K36; k15 = K35; k16 = K52; k17 = K07; k18 = K08; k19 = K14; k20 = K45; k21 = K37; k22 = K29; k23 = K49; k24 = K33; k25 = K11; k26 = K48; k27 = K06; k28 = K46; k29 = K17; k30 = K55; k31 = K40; k32 = K18; k33 = K05; k34 = K34; k35 = K39; k36 = K12; k37 = K32; k38 = K47; k39 = K10; k40 = K27; k41 = K04; k42 = K53; k43 = K26; k44 = K25; k45 = K13; k46 = K03; k47 = K41; }
#define KEYSET07 { k00 = K31; k01 = K35; k02 = K52; k03 = K43; k04 = K08; k05 = K37; k06 = K51; k07 = K15; k08 = K49; k09 = K30; k10 = K07; k11 = K02; k12 = K50; k13 = K21; k14 = K45; k15 = K44; k16 = K29; k17 = K16; k18 = K42; k19 = K23; k20 = K22; k21 = K14; k22 = K38; k23 = K01; k24 = K10; k25 = K47; k26 = K53; k27 = K11; k28 = K27; k29 = K26; k30 = K05; k31 = K17; k32 = K54; k33 = K41; k34 = K39; k35 = K20; k36 = K48; k37 = K13; k38 = K24; k39 = K19; k40 = K32; k41 = K40; k42 = K34; k43 = K03; k44 = K06; k45 = K18; k46 = K12; k47 = K46; }
#define KEYSET17 { k00 = K15; k01 = K51; k02 = K36; k03 = K02; k04 = K49; k05 = K21; k06 = K35; k07 = K31; k08 = K08; k09 = K14; k10 = K23; k11 = K43; k12 = K09; k13 = K37; k14 = K29; k15 = K28; k16 = K45; k17 = K00; k18 = K01; k19 = K07; k20 = K38; k21 = K30; k22 = K22; k23 = K42; k24 = K26; k25 = K04; k26 = K41; k27 = K54; k28 = K39; k29 = K10; k30 = K48; k31 = K33; k32 = K11; k33 = K53; k34 = K27; k35 = K32; k36 = K05; k37 = K25; k38 = K40; k39 = K03; k40 = K20; k41 = K24; k42 = K46; k43 = K19; k44 = K18; k45 = K06; k46 = K55; k47 = K34; }

#define myselx(a,b,c) ((c) ? (b) : (a))

#ifdef DESCRYPT_SALT

DECLSPEC void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, PRIVATE_AS u32 *D00, PRIVATE_AS u32 *D01, PRIVATE_AS u32 *D02, PRIVATE_AS u32 *D03, PRIVATE_AS u32 *D04, PRIVATE_AS u32 *D05, PRIVATE_AS u32 *D06, PRIVATE_AS u32 *D07, PRIVATE_AS u32 *D08, PRIVATE_AS u32 *D09, PRIVATE_AS u32 *D10, PRIVATE_AS u32 *D11, PRIVATE_AS u32 *D12, PRIVATE_AS u32 *D13, PRIVATE_AS u32 *D14, PRIVATE_AS u32 *D15, PRIVATE_AS u32 *D16, PRIVATE_AS u32 *D17, PRIVATE_AS u32 *D18, PRIVATE_AS u32 *D19, PRIVATE_AS u32 *D20, PRIVATE_AS u32 *D21, PRIVATE_AS u32 *D22, PRIVATE_AS u32 *D23, PRIVATE_AS u32 *D24, PRIVATE_AS u32 *D25, PRIVATE_AS u32 *D26, PRIVATE_AS u32 *D27, PRIVATE_AS u32 *D28, PRIVATE_AS u32 *D29, PRIVATE_AS u32 *D30, PRIVATE_AS u32 *D31, PRIVATE_AS u32 *D32, PRIVATE_AS u32 *D33, PRIVATE_AS u32 *D34, PRIVATE_AS u32 *D35, PRIVATE_AS u32 *D36, PRIVATE_AS u32 *D37, PRIVATE_AS u32 *D38, PRIVATE_AS u32 *D39, PRIVATE_AS u32 *D40, PRIVATE_AS u32 *D41, PRIVATE_AS u32 *D42, PRIVATE_AS u32 *D43, PRIVATE_AS u32 *D44, PRIVATE_AS u32 *D45, PRIVATE_AS u32 *D46, PRIVATE_AS u32 *D47, PRIVATE_AS u32 *D48, PRIVATE_AS u32 *D49, PRIVATE_AS u32 *D50, PRIVATE_AS u32 *D51, PRIVATE_AS u32 *D52, PRIVATE_AS u32 *D53, PRIVATE_AS u32 *D54, PRIVATE_AS u32 *D55, PRIVATE_AS u32 *D56, PRIVATE_AS u32 *D57, PRIVATE_AS u32 *D58, PRIVATE_AS u32 *D59, PRIVATE_AS u32 *D60, PRIVATE_AS u32 *D61, PRIVATE_AS u32 *D62, PRIVATE_AS u32 *D63)
{
  const u32 s001 = (0x001 & DESCRYPT_SALT) ? 1 : 0;
  const u32 s002 = (0x002 & DESCRYPT_SALT) ? 1 : 0;
  const u32 s004 = (0x004 & DESCRYPT_SALT) ? 1 : 0;
  const u32 s008 = (0x008 & DESCRYPT_SALT) ? 1 : 0;
  const u32 s010 = (0x010 & DESCRYPT_SALT) ? 1 : 0;
  const u32 s020 = (0x020 & DESCRYPT_SALT) ? 1 : 0;
  const u32 s040 = (0x040 & DESCRYPT_SALT) ? 1 : 0;
  const u32 s080 = (0x080 & DESCRYPT_SALT) ? 1 : 0;
  const u32 s100 = (0x100 & DESCRYPT_SALT) ? 1 : 0;
  const u32 s200 = (0x200 & DESCRYPT_SALT) ? 1 : 0;
  const u32 s400 = (0x400 & DESCRYPT_SALT) ? 1 : 0;
  const u32 s800 = (0x800 & DESCRYPT_SALT) ? 1 : 0;

  KXX_DECL u32 k00, k01, k02, k03, k04, k05;
  KXX_DECL u32 k06, k07, k08, k09, k10, k11;
  KXX_DECL u32 k12, k13, k14, k15, k16, k17;
  KXX_DECL u32 k18, k19, k20, k21, k22, k23;
  KXX_DECL u32 k24, k25, k26, k27, k28, k29;
  KXX_DECL u32 k30, k31, k32, k33, k34, k35;
  KXX_DECL u32 k36, k37, k38, k39, k40, k41;
  KXX_DECL u32 k42, k43, k44, k45, k46, k47;

  for (u32 ii = 0; ii < 25; ii++)
  {
    #ifdef _unroll
    #pragma unroll
    #endif
    for (u32 i = 0; i < 2; i++)
    {
      if (i) KEYSET10 else KEYSET00

      s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
      s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
      s3(              *D39        ^ k12,               *D40        ^ k13,               *D41        ^ k14,               *D42        ^ k15,               *D43        ^ k16,               *D44        ^ k17, D23, D15, D29, D05);
      s4(              *D43        ^ k18,               *D44        ^ k19,               *D45        ^ k20,               *D46        ^ k21,               *D47        ^ k22,               *D48        ^ k23, D25, D19, D09, D00);
      s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
      s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
      s7(              *D55        ^ k36,               *D56        ^ k37,               *D57        ^ k38,               *D58        ^ k39,               *D59        ^ k40,               *D60        ^ k41, D31, D11, D21, D06);
      s8(              *D59        ^ k42,               *D60        ^ k43,               *D61        ^ k44,               *D62        ^ k45,               *D63        ^ k46,               *D32        ^ k47, D04, D26, D14, D20);

      if (i) KEYSET11 else KEYSET01

      s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
      s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
      s3(              *D07        ^ k12,               *D08        ^ k13,               *D09        ^ k14,               *D10        ^ k15,               *D11        ^ k16,               *D12        ^ k17, D55, D47, D61, D37);
      s4(              *D11        ^ k18,               *D12        ^ k19,               *D13        ^ k20,               *D14        ^ k21,               *D15        ^ k22,               *D16        ^ k23, D57, D51, D41, D32);
      s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
      s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
      s7(              *D23        ^ k36,               *D24        ^ k37,               *D25        ^ k38,               *D26        ^ k39,               *D27        ^ k40,               *D28        ^ k41, D63, D43, D53, D38);
      s8(              *D27        ^ k42,               *D28        ^ k43,               *D29        ^ k44,               *D30        ^ k45,               *D31        ^ k46,               *D00        ^ k47, D36, D58, D46, D52);

      if (i) KEYSET12 else KEYSET02

      s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
      s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
      s3(              *D39        ^ k12,               *D40        ^ k13,               *D41        ^ k14,               *D42        ^ k15,               *D43        ^ k16,               *D44        ^ k17, D23, D15, D29, D05);
      s4(              *D43        ^ k18,               *D44        ^ k19,               *D45        ^ k20,               *D46        ^ k21,               *D47        ^ k22,               *D48        ^ k23, D25, D19, D09, D00);
      s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
      s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
      s7(              *D55        ^ k36,               *D56        ^ k37,               *D57        ^ k38,               *D58        ^ k39,               *D59        ^ k40,               *D60        ^ k41, D31, D11, D21, D06);
      s8(              *D59        ^ k42,               *D60        ^ k43,               *D61        ^ k44,               *D62        ^ k45,               *D63        ^ k46,               *D32        ^ k47, D04, D26, D14, D20);

      if (i) KEYSET13 else KEYSET03

      s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
      s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
      s3(              *D07        ^ k12,               *D08        ^ k13,               *D09        ^ k14,               *D10        ^ k15,               *D11        ^ k16,               *D12        ^ k17, D55, D47, D61, D37);
      s4(              *D11        ^ k18,               *D12        ^ k19,               *D13        ^ k20,               *D14        ^ k21,               *D15        ^ k22,               *D16        ^ k23, D57, D51, D41, D32);
      s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
      s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
      s7(              *D23        ^ k36,               *D24        ^ k37,               *D25        ^ k38,               *D26        ^ k39,               *D27        ^ k40,               *D28        ^ k41, D63, D43, D53, D38);
      s8(              *D27        ^ k42,               *D28        ^ k43,               *D29        ^ k44,               *D30        ^ k45,               *D31        ^ k46,               *D00        ^ k47, D36, D58, D46, D52);

      if (i) KEYSET14 else KEYSET04

      s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
      s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
      s3(              *D39        ^ k12,               *D40        ^ k13,               *D41        ^ k14,               *D42        ^ k15,               *D43        ^ k16,               *D44        ^ k17, D23, D15, D29, D05);
      s4(              *D43        ^ k18,               *D44        ^ k19,               *D45        ^ k20,               *D46        ^ k21,               *D47        ^ k22,               *D48        ^ k23, D25, D19, D09, D00);
      s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
      s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
      s7(              *D55        ^ k36,               *D56        ^ k37,               *D57        ^ k38,               *D58        ^ k39,               *D59        ^ k40,               *D60        ^ k41, D31, D11, D21, D06);
      s8(              *D59        ^ k42,               *D60        ^ k43,               *D61        ^ k44,               *D62        ^ k45,               *D63        ^ k46,               *D32        ^ k47, D04, D26, D14, D20);

      if (i) KEYSET15 else KEYSET05

      s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
      s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
      s3(              *D07        ^ k12,               *D08        ^ k13,               *D09        ^ k14,               *D10        ^ k15,               *D11        ^ k16,               *D12        ^ k17, D55, D47, D61, D37);
      s4(              *D11        ^ k18,               *D12        ^ k19,               *D13        ^ k20,               *D14        ^ k21,               *D15        ^ k22,               *D16        ^ k23, D57, D51, D41, D32);
      s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
      s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
      s7(              *D23        ^ k36,               *D24        ^ k37,               *D25        ^ k38,               *D26        ^ k39,               *D27        ^ k40,               *D28        ^ k41, D63, D43, D53, D38);
      s8(              *D27        ^ k42,               *D28        ^ k43,               *D29        ^ k44,               *D30        ^ k45,               *D31        ^ k46,               *D00        ^ k47, D36, D58, D46, D52);

      if (i) KEYSET16 else KEYSET06

      s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
      s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
      s3(              *D39        ^ k12,               *D40        ^ k13,               *D41        ^ k14,               *D42        ^ k15,               *D43        ^ k16,               *D44        ^ k17, D23, D15, D29, D05);
      s4(              *D43        ^ k18,               *D44        ^ k19,               *D45        ^ k20,               *D46        ^ k21,               *D47        ^ k22,               *D48        ^ k23, D25, D19, D09, D00);
      s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
      s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
      s7(              *D55        ^ k36,               *D56        ^ k37,               *D57        ^ k38,               *D58        ^ k39,               *D59        ^ k40,               *D60        ^ k41, D31, D11, D21, D06);
      s8(              *D59        ^ k42,               *D60        ^ k43,               *D61        ^ k44,               *D62        ^ k45,               *D63        ^ k46,               *D32        ^ k47, D04, D26, D14, D20);

      if (i) KEYSET17 else KEYSET07

      s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
      s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
      s3(              *D07        ^ k12,               *D08        ^ k13,               *D09        ^ k14,               *D10        ^ k15,               *D11        ^ k16,               *D12        ^ k17, D55, D47, D61, D37);
      s4(              *D11        ^ k18,               *D12        ^ k19,               *D13        ^ k20,               *D14        ^ k21,               *D15        ^ k22,               *D16        ^ k23, D57, D51, D41, D32);
      s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
      s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
      s7(              *D23        ^ k36,               *D24        ^ k37,               *D25        ^ k38,               *D26        ^ k39,               *D27        ^ k40,               *D28        ^ k41, D63, D43, D53, D38);
      s8(              *D27        ^ k42,               *D28        ^ k43,               *D29        ^ k44,               *D30        ^ k45,               *D31        ^ k46,               *D00        ^ k47, D36, D58, D46, D52);
    }

    DATASWAP;
  }

  DATASWAP;
}

#else

DECLSPEC void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, PRIVATE_AS u32 *D00, PRIVATE_AS u32 *D01, PRIVATE_AS u32 *D02, PRIVATE_AS u32 *D03, PRIVATE_AS u32 *D04, PRIVATE_AS u32 *D05, PRIVATE_AS u32 *D06, PRIVATE_AS u32 *D07, PRIVATE_AS u32 *D08, PRIVATE_AS u32 *D09, PRIVATE_AS u32 *D10, PRIVATE_AS u32 *D11, PRIVATE_AS u32 *D12, PRIVATE_AS u32 *D13, PRIVATE_AS u32 *D14, PRIVATE_AS u32 *D15, PRIVATE_AS u32 *D16, PRIVATE_AS u32 *D17, PRIVATE_AS u32 *D18, PRIVATE_AS u32 *D19, PRIVATE_AS u32 *D20, PRIVATE_AS u32 *D21, PRIVATE_AS u32 *D22, PRIVATE_AS u32 *D23, PRIVATE_AS u32 *D24, PRIVATE_AS u32 *D25, PRIVATE_AS u32 *D26, PRIVATE_AS u32 *D27, PRIVATE_AS u32 *D28, PRIVATE_AS u32 *D29, PRIVATE_AS u32 *D30, PRIVATE_AS u32 *D31, PRIVATE_AS u32 *D32, PRIVATE_AS u32 *D33, PRIVATE_AS u32 *D34, PRIVATE_AS u32 *D35, PRIVATE_AS u32 *D36, PRIVATE_AS u32 *D37, PRIVATE_AS u32 *D38, PRIVATE_AS u32 *D39, PRIVATE_AS u32 *D40, PRIVATE_AS u32 *D41, PRIVATE_AS u32 *D42, PRIVATE_AS u32 *D43, PRIVATE_AS u32 *D44, PRIVATE_AS u32 *D45, PRIVATE_AS u32 *D46, PRIVATE_AS u32 *D47, PRIVATE_AS u32 *D48, PRIVATE_AS u32 *D49, PRIVATE_AS u32 *D50, PRIVATE_AS u32 *D51, PRIVATE_AS u32 *D52, PRIVATE_AS u32 *D53, PRIVATE_AS u32 *D54, PRIVATE_AS u32 *D55, PRIVATE_AS u32 *D56, PRIVATE_AS u32 *D57, PRIVATE_AS u32 *D58, PRIVATE_AS u32 *D59, PRIVATE_AS u32 *D60, PRIVATE_AS u32 *D61, PRIVATE_AS u32 *D62, PRIVATE_AS u32 *D63)
{
  const u32 s001 = (0x001 & SALT) ? 1 : 0;
  const u32 s002 = (0x002 & SALT) ? 1 : 0;
  const u32 s004 = (0x004 & SALT) ? 1 : 0;
  const u32 s008 = (0x008 & SALT) ? 1 : 0;
  const u32 s010 = (0x010 & SALT) ? 1 : 0;
  const u32 s020 = (0x020 & SALT) ? 1 : 0;
  const u32 s040 = (0x040 & SALT) ? 1 : 0;
  const u32 s080 = (0x080 & SALT) ? 1 : 0;
  const u32 s100 = (0x100 & SALT) ? 1 : 0;
  const u32 s200 = (0x200 & SALT) ? 1 : 0;
  const u32 s400 = (0x400 & SALT) ? 1 : 0;
  const u32 s800 = (0x800 & SALT) ? 1 : 0;

  KXX_DECL u32 k00, k01, k02, k03, k04, k05;
  KXX_DECL u32 k06, k07, k08, k09, k10, k11;
  KXX_DECL u32 k12, k13, k14, k15, k16, k17;
  KXX_DECL u32 k18, k19, k20, k21, k22, k23;
  KXX_DECL u32 k24, k25, k26, k27, k28, k29;
  KXX_DECL u32 k30, k31, k32, k33, k34, k35;
  KXX_DECL u32 k36, k37, k38, k39, k40, k41;
  KXX_DECL u32 k42, k43, k44, k45, k46, k47;

  for (u32 ii = 0; ii < 25; ii++)
  {
    #ifdef _unroll
    #pragma unroll
    #endif
    for (u32 i = 0; i < 2; i++)
    {
      if (i) KEYSET10 else KEYSET00

      s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
      s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
      s3(              *D39        ^ k12,               *D40        ^ k13,               *D41        ^ k14,               *D42        ^ k15,               *D43        ^ k16,               *D44        ^ k17, D23, D15, D29, D05);
      s4(              *D43        ^ k18,               *D44        ^ k19,               *D45        ^ k20,               *D46        ^ k21,               *D47        ^ k22,               *D48        ^ k23, D25, D19, D09, D00);
      s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
      s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
      s7(              *D55        ^ k36,               *D56        ^ k37,               *D57        ^ k38,               *D58        ^ k39,               *D59        ^ k40,               *D60        ^ k41, D31, D11, D21, D06);
      s8(              *D59        ^ k42,               *D60        ^ k43,               *D61        ^ k44,               *D62        ^ k45,               *D63        ^ k46,               *D32        ^ k47, D04, D26, D14, D20);

      if (i) KEYSET11 else KEYSET01

      s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
      s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
      s3(              *D07        ^ k12,               *D08        ^ k13,               *D09        ^ k14,               *D10        ^ k15,               *D11        ^ k16,               *D12        ^ k17, D55, D47, D61, D37);
      s4(              *D11        ^ k18,               *D12        ^ k19,               *D13        ^ k20,               *D14        ^ k21,               *D15        ^ k22,               *D16        ^ k23, D57, D51, D41, D32);
      s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
      s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
      s7(              *D23        ^ k36,               *D24        ^ k37,               *D25        ^ k38,               *D26        ^ k39,               *D27        ^ k40,               *D28        ^ k41, D63, D43, D53, D38);
      s8(              *D27        ^ k42,               *D28        ^ k43,               *D29        ^ k44,               *D30        ^ k45,               *D31        ^ k46,               *D00        ^ k47, D36, D58, D46, D52);

      if (i) KEYSET12 else KEYSET02

      s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
      s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
      s3(              *D39        ^ k12,               *D40        ^ k13,               *D41        ^ k14,               *D42        ^ k15,               *D43        ^ k16,               *D44        ^ k17, D23, D15, D29, D05);
      s4(              *D43        ^ k18,               *D44        ^ k19,               *D45        ^ k20,               *D46        ^ k21,               *D47        ^ k22,               *D48        ^ k23, D25, D19, D09, D00);
      s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
      s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
      s7(              *D55        ^ k36,               *D56        ^ k37,               *D57        ^ k38,               *D58        ^ k39,               *D59        ^ k40,               *D60        ^ k41, D31, D11, D21, D06);
      s8(              *D59        ^ k42,               *D60        ^ k43,               *D61        ^ k44,               *D62        ^ k45,               *D63        ^ k46,               *D32        ^ k47, D04, D26, D14, D20);

      if (i) KEYSET13 else KEYSET03

      s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
      s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
      s3(              *D07        ^ k12,               *D08        ^ k13,               *D09        ^ k14,               *D10        ^ k15,               *D11        ^ k16,               *D12        ^ k17, D55, D47, D61, D37);
      s4(              *D11        ^ k18,               *D12        ^ k19,               *D13        ^ k20,               *D14        ^ k21,               *D15        ^ k22,               *D16        ^ k23, D57, D51, D41, D32);
      s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
      s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
      s7(              *D23        ^ k36,               *D24        ^ k37,               *D25        ^ k38,               *D26        ^ k39,               *D27        ^ k40,               *D28        ^ k41, D63, D43, D53, D38);
      s8(              *D27        ^ k42,               *D28        ^ k43,               *D29        ^ k44,               *D30        ^ k45,               *D31        ^ k46,               *D00        ^ k47, D36, D58, D46, D52);

      if (i) KEYSET14 else KEYSET04

      s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
      s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
      s3(              *D39        ^ k12,               *D40        ^ k13,               *D41        ^ k14,               *D42        ^ k15,               *D43        ^ k16,               *D44        ^ k17, D23, D15, D29, D05);
      s4(              *D43        ^ k18,               *D44        ^ k19,               *D45        ^ k20,               *D46        ^ k21,               *D47        ^ k22,               *D48        ^ k23, D25, D19, D09, D00);
      s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
      s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
      s7(              *D55        ^ k36,               *D56        ^ k37,               *D57        ^ k38,               *D58        ^ k39,               *D59        ^ k40,               *D60        ^ k41, D31, D11, D21, D06);
      s8(              *D59        ^ k42,               *D60        ^ k43,               *D61        ^ k44,               *D62        ^ k45,               *D63        ^ k46,               *D32        ^ k47, D04, D26, D14, D20);

      if (i) KEYSET15 else KEYSET05

      s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
      s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
      s3(              *D07        ^ k12,               *D08        ^ k13,               *D09        ^ k14,               *D10        ^ k15,               *D11        ^ k16,               *D12        ^ k17, D55, D47, D61, D37);
      s4(              *D11        ^ k18,               *D12        ^ k19,               *D13        ^ k20,               *D14        ^ k21,               *D15        ^ k22,               *D16        ^ k23, D57, D51, D41, D32);
      s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
      s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
      s7(              *D23        ^ k36,               *D24        ^ k37,               *D25        ^ k38,               *D26        ^ k39,               *D27        ^ k40,               *D28        ^ k41, D63, D43, D53, D38);
      s8(              *D27        ^ k42,               *D28        ^ k43,               *D29        ^ k44,               *D30        ^ k45,               *D31        ^ k46,               *D00        ^ k47, D36, D58, D46, D52);

      if (i) KEYSET16 else KEYSET06

      s1(myselx (*D63, *D47, s001) ^ k00, myselx (*D32, *D48, s002) ^ k01, myselx (*D33, *D49, s004) ^ k02, myselx (*D34, *D50, s008) ^ k03, myselx (*D35, *D51, s010) ^ k04, myselx (*D36, *D52, s020) ^ k05, D08, D16, D22, D30);
      s2(myselx (*D35, *D51, s040) ^ k06, myselx (*D36, *D52, s080) ^ k07, myselx (*D37, *D53, s100) ^ k08, myselx (*D38, *D54, s200) ^ k09, myselx (*D39, *D55, s400) ^ k10, myselx (*D40, *D56, s800) ^ k11, D12, D27, D01, D17);
      s3(              *D39        ^ k12,               *D40        ^ k13,               *D41        ^ k14,               *D42        ^ k15,               *D43        ^ k16,               *D44        ^ k17, D23, D15, D29, D05);
      s4(              *D43        ^ k18,               *D44        ^ k19,               *D45        ^ k20,               *D46        ^ k21,               *D47        ^ k22,               *D48        ^ k23, D25, D19, D09, D00);
      s5(myselx (*D47, *D63, s001) ^ k24, myselx (*D48, *D32, s002) ^ k25, myselx (*D49, *D33, s004) ^ k26, myselx (*D50, *D34, s008) ^ k27, myselx (*D51, *D35, s010) ^ k28, myselx (*D52, *D36, s020) ^ k29, D07, D13, D24, D02);
      s6(myselx (*D51, *D35, s040) ^ k30, myselx (*D52, *D36, s080) ^ k31, myselx (*D53, *D37, s100) ^ k32, myselx (*D54, *D38, s200) ^ k33, myselx (*D55, *D39, s400) ^ k34, myselx (*D56, *D40, s800) ^ k35, D03, D28, D10, D18);
      s7(              *D55        ^ k36,               *D56        ^ k37,               *D57        ^ k38,               *D58        ^ k39,               *D59        ^ k40,               *D60        ^ k41, D31, D11, D21, D06);
      s8(              *D59        ^ k42,               *D60        ^ k43,               *D61        ^ k44,               *D62        ^ k45,               *D63        ^ k46,               *D32        ^ k47, D04, D26, D14, D20);

      if (i) KEYSET17 else KEYSET07

      s1(myselx (*D31, *D15, s001) ^ k00, myselx (*D00, *D16, s002) ^ k01, myselx (*D01, *D17, s004) ^ k02, myselx (*D02, *D18, s008) ^ k03, myselx (*D03, *D19, s010) ^ k04, myselx (*D04, *D20, s020) ^ k05, D40, D48, D54, D62);
      s2(myselx (*D03, *D19, s040) ^ k06, myselx (*D04, *D20, s080) ^ k07, myselx (*D05, *D21, s100) ^ k08, myselx (*D06, *D22, s200) ^ k09, myselx (*D07, *D23, s400) ^ k10, myselx (*D08, *D24, s800) ^ k11, D44, D59, D33, D49);
      s3(              *D07        ^ k12,               *D08        ^ k13,               *D09        ^ k14,               *D10        ^ k15,               *D11        ^ k16,               *D12        ^ k17, D55, D47, D61, D37);
      s4(              *D11        ^ k18,               *D12        ^ k19,               *D13        ^ k20,               *D14        ^ k21,               *D15        ^ k22,               *D16        ^ k23, D57, D51, D41, D32);
      s5(myselx (*D15, *D31, s001) ^ k24, myselx (*D16, *D00, s002) ^ k25, myselx (*D17, *D01, s004) ^ k26, myselx (*D18, *D02, s008) ^ k27, myselx (*D19, *D03, s010) ^ k28, myselx (*D20, *D04, s020) ^ k29, D39, D45, D56, D34);
      s6(myselx (*D19, *D03, s040) ^ k30, myselx (*D20, *D04, s080) ^ k31, myselx (*D21, *D05, s100) ^ k32, myselx (*D22, *D06, s200) ^ k33, myselx (*D23, *D07, s400) ^ k34, myselx (*D24, *D08, s800) ^ k35, D35, D60, D42, D50);
      s7(              *D23        ^ k36,               *D24        ^ k37,               *D25        ^ k38,               *D26        ^ k39,               *D27        ^ k40,               *D28        ^ k41, D63, D43, D53, D38);
      s8(              *D27        ^ k42,               *D28        ^ k43,               *D29        ^ k44,               *D30        ^ k45,               *D31        ^ k46,               *D00        ^ k47, D36, D58, D46, D52);
    }

    DATASWAP;
  }

  DATASWAP;
}

#endif

DECLSPEC void transpose32c (PRIVATE_AS u32 *data)
{
  #define swap(x,y,j,m)               \
     t  = ((x) ^ ((y) >> (j))) & (m); \
    (x) = (x) ^ t;                    \
    (y) = (y) ^ (t << (j));

  u32 t;

  swap (data[ 0], data[16], 16, 0x0000ffff);
  swap (data[ 1], data[17], 16, 0x0000ffff);
  swap (data[ 2], data[18], 16, 0x0000ffff);
  swap (data[ 3], data[19], 16, 0x0000ffff);
  swap (data[ 4], data[20], 16, 0x0000ffff);
  swap (data[ 5], data[21], 16, 0x0000ffff);
  swap (data[ 6], data[22], 16, 0x0000ffff);
  swap (data[ 7], data[23], 16, 0x0000ffff);
  swap (data[ 8], data[24], 16, 0x0000ffff);
  swap (data[ 9], data[25], 16, 0x0000ffff);
  swap (data[10], data[26], 16, 0x0000ffff);
  swap (data[11], data[27], 16, 0x0000ffff);
  swap (data[12], data[28], 16, 0x0000ffff);
  swap (data[13], data[29], 16, 0x0000ffff);
  swap (data[14], data[30], 16, 0x0000ffff);
  swap (data[15], data[31], 16, 0x0000ffff);
  swap (data[ 0], data[ 8],  8, 0x00ff00ff);
  swap (data[ 1], data[ 9],  8, 0x00ff00ff);
  swap (data[ 2], data[10],  8, 0x00ff00ff);
  swap (data[ 3], data[11],  8, 0x00ff00ff);
  swap (data[ 4], data[12],  8, 0x00ff00ff);
  swap (data[ 5], data[13],  8, 0x00ff00ff);
  swap (data[ 6], data[14],  8, 0x00ff00ff);
  swap (data[ 7], data[15],  8, 0x00ff00ff);
  swap (data[ 0], data[ 4],  4, 0x0f0f0f0f);
  swap (data[ 1], data[ 5],  4, 0x0f0f0f0f);
  swap (data[ 2], data[ 6],  4, 0x0f0f0f0f);
  swap (data[ 3], data[ 7],  4, 0x0f0f0f0f);
  swap (data[ 0], data[ 2],  2, 0x33333333);
  swap (data[ 1], data[ 3],  2, 0x33333333);
  swap (data[ 0], data[ 1],  1, 0x55555555);
  swap (data[ 2], data[ 3],  1, 0x55555555);
  swap (data[ 4], data[ 6],  2, 0x33333333);
  swap (data[ 5], data[ 7],  2, 0x33333333);
  swap (data[ 4], data[ 5],  1, 0x55555555);
  swap (data[ 6], data[ 7],  1, 0x55555555);
  swap (data[ 8], data[12],  4, 0x0f0f0f0f);
  swap (data[ 9], data[13],  4, 0x0f0f0f0f);
  swap (data[10], data[14],  4, 0x0f0f0f0f);
  swap (data[11], data[15],  4, 0x0f0f0f0f);
  swap (data[ 8], data[10],  2, 0x33333333);
  swap (data[ 9], data[11],  2, 0x33333333);
  swap (data[ 8], data[ 9],  1, 0x55555555);
  swap (data[10], data[11],  1, 0x55555555);
  swap (data[12], data[14],  2, 0x33333333);
  swap (data[13], data[15],  2, 0x33333333);
  swap (data[12], data[13],  1, 0x55555555);
  swap (data[14], data[15],  1, 0x55555555);
  swap (data[16], data[24],  8, 0x00ff00ff);
  swap (data[17], data[25],  8, 0x00ff00ff);
  swap (data[18], data[26],  8, 0x00ff00ff);
  swap (data[19], data[27],  8, 0x00ff00ff);
  swap (data[20], data[28],  8, 0x00ff00ff);
  swap (data[21], data[29],  8, 0x00ff00ff);
  swap (data[22], data[30],  8, 0x00ff00ff);
  swap (data[23], data[31],  8, 0x00ff00ff);
  swap (data[16], data[20],  4, 0x0f0f0f0f);
  swap (data[17], data[21],  4, 0x0f0f0f0f);
  swap (data[18], data[22],  4, 0x0f0f0f0f);
  swap (data[19], data[23],  4, 0x0f0f0f0f);
  swap (data[16], data[18],  2, 0x33333333);
  swap (data[17], data[19],  2, 0x33333333);
  swap (data[16], data[17],  1, 0x55555555);
  swap (data[18], data[19],  1, 0x55555555);
  swap (data[20], data[22],  2, 0x33333333);
  swap (data[21], data[23],  2, 0x33333333);
  swap (data[20], data[21],  1, 0x55555555);
  swap (data[22], data[23],  1, 0x55555555);
  swap (data[24], data[28],  4, 0x0f0f0f0f);
  swap (data[25], data[29],  4, 0x0f0f0f0f);
  swap (data[26], data[30],  4, 0x0f0f0f0f);
  swap (data[27], data[31],  4, 0x0f0f0f0f);
  swap (data[24], data[26],  2, 0x33333333);
  swap (data[25], data[27],  2, 0x33333333);
  swap (data[24], data[25],  1, 0x55555555);
  swap (data[26], data[27],  1, 0x55555555);
  swap (data[28], data[30],  2, 0x33333333);
  swap (data[29], data[31],  2, 0x33333333);
  swap (data[28], data[29],  1, 0x55555555);
  swap (data[30], data[31],  1, 0x55555555);
}

//
// transpose bitslice mod : attention race conditions, need different buffers for *in and *out
//

KERNEL_FQ void m01500_tm (KERN_ATTR_TM)
{
  const u64 gid = get_global_id (0);

  // if (gid >= GID_CNT) return;

  const u32 block = gid / 32;
  const u32 slice = gid % 32;

  const u32 w0 = mod[gid];

  const u32 w0s = (w0 << 1) & 0xfefefefe;

  #ifdef _unroll
  #pragma unroll
  #endif
  for (int i = 0, j = 0; i < 32; i += 8, j += 7)
  {
    hc_atomic_or (&words_buf_b[block].b[j + 0], (((w0s >> (i + 7)) & 1) << slice));
    hc_atomic_or (&words_buf_b[block].b[j + 1], (((w0s >> (i + 6)) & 1) << slice));
    hc_atomic_or (&words_buf_b[block].b[j + 2], (((w0s >> (i + 5)) & 1) << slice));
    hc_atomic_or (&words_buf_b[block].b[j + 3], (((w0s >> (i + 4)) & 1) << slice));
    hc_atomic_or (&words_buf_b[block].b[j + 4], (((w0s >> (i + 3)) & 1) << slice));
    hc_atomic_or (&words_buf_b[block].b[j + 5], (((w0s >> (i + 2)) & 1) << slice));
    hc_atomic_or (&words_buf_b[block].b[j + 6], (((w0s >> (i + 1)) & 1) << slice));
  }
}

#ifndef DESCRYPT_SALT

KERNEL_FQ void m01500_sxx (KERN_ATTR_BITSLICE ())
{
}

KERNEL_FQ void m01500_mxx (KERN_ATTR_BITSLICE ())
{
  /**
   * base
   */

  const u64 gid = get_global_id (0);
  const u64 lid = get_local_id (0);

  if (gid >= GID_CNT) return;

  /**
   * salt
   */

  const u32 salt = salt_bufs[SALT_POS_HOST].salt_buf[0];

  /**
   * base
   */

  const u32 w0 = pws[gid].i[0];
  const u32 w1 = pws[gid].i[1];

  const u32 w0s = (w0 << 1) & 0xfefefefe;
  const u32 w1s = (w1 << 1) & 0xfefefefe;

  #define K00 (((w0s >> ( 0 + 7)) & 1) ? -1 : 0)
  #define K01 (((w0s >> ( 0 + 6)) & 1) ? -1 : 0)
  #define K02 (((w0s >> ( 0 + 5)) & 1) ? -1 : 0)
  #define K03 (((w0s >> ( 0 + 4)) & 1) ? -1 : 0)
  #define K04 (((w0s >> ( 0 + 3)) & 1) ? -1 : 0)
  #define K05 (((w0s >> ( 0 + 2)) & 1) ? -1 : 0)
  #define K06 (((w0s >> ( 0 + 1)) & 1) ? -1 : 0)
  #define K07 (((w0s >> ( 8 + 7)) & 1) ? -1 : 0)
  #define K08 (((w0s >> ( 8 + 6)) & 1) ? -1 : 0)
  #define K09 (((w0s >> ( 8 + 5)) & 1) ? -1 : 0)
  #define K10 (((w0s >> ( 8 + 4)) & 1) ? -1 : 0)
  #define K11 (((w0s >> ( 8 + 3)) & 1) ? -1 : 0)
  #define K12 (((w0s >> ( 8 + 2)) & 1) ? -1 : 0)
  #define K13 (((w0s >> ( 8 + 1)) & 1) ? -1 : 0)
  #define K14 (((w0s >> (16 + 7)) & 1) ? -1 : 0)
  #define K15 (((w0s >> (16 + 6)) & 1) ? -1 : 0)
  #define K16 (((w0s >> (16 + 5)) & 1) ? -1 : 0)
  #define K17 (((w0s >> (16 + 4)) & 1) ? -1 : 0)
  #define K18 (((w0s >> (16 + 3)) & 1) ? -1 : 0)
  #define K19 (((w0s >> (16 + 2)) & 1) ? -1 : 0)
  #define K20 (((w0s >> (16 + 1)) & 1) ? -1 : 0)
  #define K21 (((w0s >> (24 + 7)) & 1) ? -1 : 0)
  #define K22 (((w0s >> (24 + 6)) & 1) ? -1 : 0)
  #define K23 (((w0s >> (24 + 5)) & 1) ? -1 : 0)
  #define K24 (((w0s >> (24 + 4)) & 1) ? -1 : 0)
  #define K25 (((w0s >> (24 + 3)) & 1) ? -1 : 0)
  #define K26 (((w0s >> (24 + 2)) & 1) ? -1 : 0)
  #define K27 (((w0s >> (24 + 1)) & 1) ? -1 : 0)
  #define K28 (((w1s >> ( 0 + 7)) & 1) ? -1 : 0)
  #define K29 (((w1s >> ( 0 + 6)) & 1) ? -1 : 0)
  #define K30 (((w1s >> ( 0 + 5)) & 1) ? -1 : 0)
  #define K31 (((w1s >> ( 0 + 4)) & 1) ? -1 : 0)
  #define K32 (((w1s >> ( 0 + 3)) & 1) ? -1 : 0)
  #define K33 (((w1s >> ( 0 + 2)) & 1) ? -1 : 0)
  #define K34 (((w1s >> ( 0 + 1)) & 1) ? -1 : 0)
  #define K35 (((w1s >> ( 8 + 7)) & 1) ? -1 : 0)
  #define K36 (((w1s >> ( 8 + 6)) & 1) ? -1 : 0)
  #define K37 (((w1s >> ( 8 + 5)) & 1) ? -1 : 0)
  #define K38 (((w1s >> ( 8 + 4)) & 1) ? -1 : 0)
  #define K39 (((w1s >> ( 8 + 3)) & 1) ? -1 : 0)
  #define K40 (((w1s >> ( 8 + 2)) & 1) ? -1 : 0)
  #define K41 (((w1s >> ( 8 + 1)) & 1) ? -1 : 0)
  #define K42 (((w1s >> (16 + 7)) & 1) ? -1 : 0)
  #define K43 (((w1s >> (16 + 6)) & 1) ? -1 : 0)
  #define K44 (((w1s >> (16 + 5)) & 1) ? -1 : 0)
  #define K45 (((w1s >> (16 + 4)) & 1) ? -1 : 0)
  #define K46 (((w1s >> (16 + 3)) & 1) ? -1 : 0)
  #define K47 (((w1s >> (16 + 2)) & 1) ? -1 : 0)
  #define K48 (((w1s >> (16 + 1)) & 1) ? -1 : 0)
  #define K49 (((w1s >> (24 + 7)) & 1) ? -1 : 0)
  #define K50 (((w1s >> (24 + 6)) & 1) ? -1 : 0)
  #define K51 (((w1s >> (24 + 5)) & 1) ? -1 : 0)
  #define K52 (((w1s >> (24 + 4)) & 1) ? -1 : 0)
  #define K53 (((w1s >> (24 + 3)) & 1) ? -1 : 0)
  #define K54 (((w1s >> (24 + 2)) & 1) ? -1 : 0)
  #define K55 (((w1s >> (24 + 1)) & 1) ? -1 : 0)

  /**
   * inner loop
   */

  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += 32)
  {
    u32 k00 = K00;
    u32 k01 = K01;
    u32 k02 = K02;
    u32 k03 = K03;
    u32 k04 = K04;
    u32 k05 = K05;
    u32 k06 = K06;
    u32 k07 = K07;
    u32 k08 = K08;
    u32 k09 = K09;
    u32 k10 = K10;
    u32 k11 = K11;
    u32 k12 = K12;
    u32 k13 = K13;
    u32 k14 = K14;
    u32 k15 = K15;
    u32 k16 = K16;
    u32 k17 = K17;
    u32 k18 = K18;
    u32 k19 = K19;
    u32 k20 = K20;
    u32 k21 = K21;
    u32 k22 = K22;
    u32 k23 = K23;
    u32 k24 = K24;
    u32 k25 = K25;
    u32 k26 = K26;
    u32 k27 = K27;

    const u32 pc_pos = il_pos / 32;

    k00 |= words_buf_s[pc_pos].b[ 0];
    k01 |= words_buf_s[pc_pos].b[ 1];
    k02 |= words_buf_s[pc_pos].b[ 2];
    k03 |= words_buf_s[pc_pos].b[ 3];
    k04 |= words_buf_s[pc_pos].b[ 4];
    k05 |= words_buf_s[pc_pos].b[ 5];
    k06 |= words_buf_s[pc_pos].b[ 6];
    k07 |= words_buf_s[pc_pos].b[ 7];
    k08 |= words_buf_s[pc_pos].b[ 8];
    k09 |= words_buf_s[pc_pos].b[ 9];
    k10 |= words_buf_s[pc_pos].b[10];
    k11 |= words_buf_s[pc_pos].b[11];
    k12 |= words_buf_s[pc_pos].b[12];
    k13 |= words_buf_s[pc_pos].b[13];
    k14 |= words_buf_s[pc_pos].b[14];
    k15 |= words_buf_s[pc_pos].b[15];
    k16 |= words_buf_s[pc_pos].b[16];
    k17 |= words_buf_s[pc_pos].b[17];
    k18 |= words_buf_s[pc_pos].b[18];
    k19 |= words_buf_s[pc_pos].b[19];
    k20 |= words_buf_s[pc_pos].b[20];
    k21 |= words_buf_s[pc_pos].b[21];
    k22 |= words_buf_s[pc_pos].b[22];
    k23 |= words_buf_s[pc_pos].b[23];
    k24 |= words_buf_s[pc_pos].b[24];
    k25 |= words_buf_s[pc_pos].b[25];
    k26 |= words_buf_s[pc_pos].b[26];
    k27 |= words_buf_s[pc_pos].b[27];

    u32 D00 = 0;
    u32 D01 = 0;
    u32 D02 = 0;
    u32 D03 = 0;
    u32 D04 = 0;
    u32 D05 = 0;
    u32 D06 = 0;
    u32 D07 = 0;
    u32 D08 = 0;
    u32 D09 = 0;
    u32 D10 = 0;
    u32 D11 = 0;
    u32 D12 = 0;
    u32 D13 = 0;
    u32 D14 = 0;
    u32 D15 = 0;
    u32 D16 = 0;
    u32 D17 = 0;
    u32 D18 = 0;
    u32 D19 = 0;
    u32 D20 = 0;
    u32 D21 = 0;
    u32 D22 = 0;
    u32 D23 = 0;
    u32 D24 = 0;
    u32 D25 = 0;
    u32 D26 = 0;
    u32 D27 = 0;
    u32 D28 = 0;
    u32 D29 = 0;
    u32 D30 = 0;
    u32 D31 = 0;
    u32 D32 = 0;
    u32 D33 = 0;
    u32 D34 = 0;
    u32 D35 = 0;
    u32 D36 = 0;
    u32 D37 = 0;
    u32 D38 = 0;
    u32 D39 = 0;
    u32 D40 = 0;
    u32 D41 = 0;
    u32 D42 = 0;
    u32 D43 = 0;
    u32 D44 = 0;
    u32 D45 = 0;
    u32 D46 = 0;
    u32 D47 = 0;
    u32 D48 = 0;
    u32 D49 = 0;
    u32 D50 = 0;
    u32 D51 = 0;
    u32 D52 = 0;
    u32 D53 = 0;
    u32 D54 = 0;
    u32 D55 = 0;
    u32 D56 = 0;
    u32 D57 = 0;
    u32 D58 = 0;
    u32 D59 = 0;
    u32 D60 = 0;
    u32 D61 = 0;
    u32 D62 = 0;
    u32 D63 = 0;

    DESCrypt
    (
      salt,
      k00, k01, k02, k03, k04, k05, k06,
      k07, k08, k09, k10, k11, k12, k13,
      k14, k15, k16, k17, k18, k19, k20,
      k21, k22, k23, k24, k25, k26, k27,
      K28, K29, K30, K31, K32, K33, K34,
      K35, K36, K37, K38, K39, K40, K41,
      K42, K43, K44, K45, K46, K47, K48,
      K49, K50, K51, K52, K53, K54, K55,
      &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
      &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
      &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
      &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
      &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
      &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
      &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
      &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
    );

    u32 out[64];

    out[ 0] = D00;
    out[ 1] = D01;
    out[ 2] = D02;
    out[ 3] = D03;
    out[ 4] = D04;
    out[ 5] = D05;
    out[ 6] = D06;
    out[ 7] = D07;
    out[ 8] = D08;
    out[ 9] = D09;
    out[10] = D10;
    out[11] = D11;
    out[12] = D12;
    out[13] = D13;
    out[14] = D14;
    out[15] = D15;
    out[16] = D16;
    out[17] = D17;
    out[18] = D18;
    out[19] = D19;
    out[20] = D20;
    out[21] = D21;
    out[22] = D22;
    out[23] = D23;
    out[24] = D24;
    out[25] = D25;
    out[26] = D26;
    out[27] = D27;
    out[28] = D28;
    out[29] = D29;
    out[30] = D30;
    out[31] = D31;
    out[32] = D32;
    out[33] = D33;
    out[34] = D34;
    out[35] = D35;
    out[36] = D36;
    out[37] = D37;
    out[38] = D38;
    out[39] = D39;
    out[40] = D40;
    out[41] = D41;
    out[42] = D42;
    out[43] = D43;
    out[44] = D44;
    out[45] = D45;
    out[46] = D46;
    out[47] = D47;
    out[48] = D48;
    out[49] = D49;
    out[50] = D50;
    out[51] = D51;
    out[52] = D52;
    out[53] = D53;
    out[54] = D54;
    out[55] = D55;
    out[56] = D56;
    out[57] = D57;
    out[58] = D58;
    out[59] = D59;
    out[60] = D60;
    out[61] = D61;
    out[62] = D62;
    out[63] = D63;

    if (DIGESTS_CNT < 16)
    {
      for (u32 d = 0; d < DIGESTS_CNT; d++)
      {
        const u32 final_hash_pos = DIGESTS_OFFSET_HOST + d;

        if (hashes_shown[final_hash_pos]) continue;

        u32 search[2];

        search[0] = digests_buf[final_hash_pos].digest_buf[DGST_R0];
        search[1] = digests_buf[final_hash_pos].digest_buf[DGST_R1];

        u32 tmpResult = 0;

        #pragma unroll
        for (int i = 0; i < 32; i++)
        {
          const u32 b0 = -((search[0] >> i) & 1);
          const u32 b1 = -((search[1] >> i) & 1);

          tmpResult |= out[ 0 + i] ^ b0;
          tmpResult |= out[32 + i] ^ b1;
        }

        if (tmpResult == 0xffffffff) continue;

        const u32 slice = ffz (tmpResult);

        const u32 r0 = search[0];
        const u32 r1 = search[1];
        #ifdef KERNEL_STATIC
        const u32 r2 = 0;
        const u32 r3 = 0;
        #endif

        #include COMPARE_M
      }
    }
    else
    {
      u32 out0[32];
      u32 out1[32];

      #pragma unroll
      for (int i = 0; i < 32; i++)
      {
        out0[i] = out[ 0 + 31 - i];
        out1[i] = out[32 + 31 - i];
      }

      transpose32c (out0);
      transpose32c (out1);

      #pragma unroll
      for (int slice = 0; slice < 32; slice++)
      {
        const u32 r0 = out0[31 - slice];
        const u32 r1 = out1[31 - slice];
        #ifdef KERNEL_STATIC
        const u32 r2 = 0;
        const u32 r3 = 0;
        #endif

        #include COMPARE_M
      }
    }
  }
}

#else

KERNEL_FQ void m01500_sxx (KERN_ATTR_BITSLICE ())
{
  /**
   * base
   */

  const u64 gid = get_global_id (0);
  const u64 lid = get_local_id (0);

  if (gid >= GID_CNT) return;

  /**
   * salt
   */

  const u32 salt = salt_bufs[SALT_POS_HOST].salt_buf[0];

  /**
   * digest
   */

  const u32 s0 = digests_buf[0].digest_buf[0];
  const u32 s1 = digests_buf[0].digest_buf[1];

  const u32 S00 = (((s0 >>  0) & 1) ? -1 : 0);
  const u32 S01 = (((s0 >>  1) & 1) ? -1 : 0);
  const u32 S02 = (((s0 >>  2) & 1) ? -1 : 0);
  const u32 S03 = (((s0 >>  3) & 1) ? -1 : 0);
  const u32 S04 = (((s0 >>  4) & 1) ? -1 : 0);
  const u32 S05 = (((s0 >>  5) & 1) ? -1 : 0);
  const u32 S06 = (((s0 >>  6) & 1) ? -1 : 0);
  const u32 S07 = (((s0 >>  7) & 1) ? -1 : 0);
  const u32 S08 = (((s0 >>  8) & 1) ? -1 : 0);
  const u32 S09 = (((s0 >>  9) & 1) ? -1 : 0);
  const u32 S10 = (((s0 >> 10) & 1) ? -1 : 0);
  const u32 S11 = (((s0 >> 11) & 1) ? -1 : 0);
  const u32 S12 = (((s0 >> 12) & 1) ? -1 : 0);
  const u32 S13 = (((s0 >> 13) & 1) ? -1 : 0);
  const u32 S14 = (((s0 >> 14) & 1) ? -1 : 0);
  const u32 S15 = (((s0 >> 15) & 1) ? -1 : 0);
  const u32 S16 = (((s0 >> 16) & 1) ? -1 : 0);
  const u32 S17 = (((s0 >> 17) & 1) ? -1 : 0);
  const u32 S18 = (((s0 >> 18) & 1) ? -1 : 0);
  const u32 S19 = (((s0 >> 19) & 1) ? -1 : 0);
  const u32 S20 = (((s0 >> 20) & 1) ? -1 : 0);
  const u32 S21 = (((s0 >> 21) & 1) ? -1 : 0);
  const u32 S22 = (((s0 >> 22) & 1) ? -1 : 0);
  const u32 S23 = (((s0 >> 23) & 1) ? -1 : 0);
  const u32 S24 = (((s0 >> 24) & 1) ? -1 : 0);
  const u32 S25 = (((s0 >> 25) & 1) ? -1 : 0);
  const u32 S26 = (((s0 >> 26) & 1) ? -1 : 0);
  const u32 S27 = (((s0 >> 27) & 1) ? -1 : 0);
  const u32 S28 = (((s0 >> 28) & 1) ? -1 : 0);
  const u32 S29 = (((s0 >> 29) & 1) ? -1 : 0);
  const u32 S30 = (((s0 >> 30) & 1) ? -1 : 0);
  const u32 S31 = (((s0 >> 31) & 1) ? -1 : 0);
  const u32 S32 = (((s1 >>  0) & 1) ? -1 : 0);
  const u32 S33 = (((s1 >>  1) & 1) ? -1 : 0);
  const u32 S34 = (((s1 >>  2) & 1) ? -1 : 0);
  const u32 S35 = (((s1 >>  3) & 1) ? -1 : 0);
  const u32 S36 = (((s1 >>  4) & 1) ? -1 : 0);
  const u32 S37 = (((s1 >>  5) & 1) ? -1 : 0);
  const u32 S38 = (((s1 >>  6) & 1) ? -1 : 0);
  const u32 S39 = (((s1 >>  7) & 1) ? -1 : 0);
  const u32 S40 = (((s1 >>  8) & 1) ? -1 : 0);
  const u32 S41 = (((s1 >>  9) & 1) ? -1 : 0);
  const u32 S42 = (((s1 >> 10) & 1) ? -1 : 0);
  const u32 S43 = (((s1 >> 11) & 1) ? -1 : 0);
  const u32 S44 = (((s1 >> 12) & 1) ? -1 : 0);
  const u32 S45 = (((s1 >> 13) & 1) ? -1 : 0);
  const u32 S46 = (((s1 >> 14) & 1) ? -1 : 0);
  const u32 S47 = (((s1 >> 15) & 1) ? -1 : 0);
  const u32 S48 = (((s1 >> 16) & 1) ? -1 : 0);
  const u32 S49 = (((s1 >> 17) & 1) ? -1 : 0);
  const u32 S50 = (((s1 >> 18) & 1) ? -1 : 0);
  const u32 S51 = (((s1 >> 19) & 1) ? -1 : 0);
  const u32 S52 = (((s1 >> 20) & 1) ? -1 : 0);
  const u32 S53 = (((s1 >> 21) & 1) ? -1 : 0);
  const u32 S54 = (((s1 >> 22) & 1) ? -1 : 0);
  const u32 S55 = (((s1 >> 23) & 1) ? -1 : 0);
  const u32 S56 = (((s1 >> 24) & 1) ? -1 : 0);
  const u32 S57 = (((s1 >> 25) & 1) ? -1 : 0);
  const u32 S58 = (((s1 >> 26) & 1) ? -1 : 0);
  const u32 S59 = (((s1 >> 27) & 1) ? -1 : 0);
  const u32 S60 = (((s1 >> 28) & 1) ? -1 : 0);
  const u32 S61 = (((s1 >> 29) & 1) ? -1 : 0);
  const u32 S62 = (((s1 >> 30) & 1) ? -1 : 0);
  const u32 S63 = (((s1 >> 31) & 1) ? -1 : 0);

  /**
   * base
   */

  const u32 w0 = pws[gid].i[0];
  const u32 w1 = pws[gid].i[1];

  const u32 w0s = (w0 << 1) & 0xfefefefe;
  const u32 w1s = (w1 << 1) & 0xfefefefe;

  #define K00 (((w0s >> ( 0 + 7)) & 1) ? -1 : 0)
  #define K01 (((w0s >> ( 0 + 6)) & 1) ? -1 : 0)
  #define K02 (((w0s >> ( 0 + 5)) & 1) ? -1 : 0)
  #define K03 (((w0s >> ( 0 + 4)) & 1) ? -1 : 0)
  #define K04 (((w0s >> ( 0 + 3)) & 1) ? -1 : 0)
  #define K05 (((w0s >> ( 0 + 2)) & 1) ? -1 : 0)
  #define K06 (((w0s >> ( 0 + 1)) & 1) ? -1 : 0)
  #define K07 (((w0s >> ( 8 + 7)) & 1) ? -1 : 0)
  #define K08 (((w0s >> ( 8 + 6)) & 1) ? -1 : 0)
  #define K09 (((w0s >> ( 8 + 5)) & 1) ? -1 : 0)
  #define K10 (((w0s >> ( 8 + 4)) & 1) ? -1 : 0)
  #define K11 (((w0s >> ( 8 + 3)) & 1) ? -1 : 0)
  #define K12 (((w0s >> ( 8 + 2)) & 1) ? -1 : 0)
  #define K13 (((w0s >> ( 8 + 1)) & 1) ? -1 : 0)
  #define K14 (((w0s >> (16 + 7)) & 1) ? -1 : 0)
  #define K15 (((w0s >> (16 + 6)) & 1) ? -1 : 0)
  #define K16 (((w0s >> (16 + 5)) & 1) ? -1 : 0)
  #define K17 (((w0s >> (16 + 4)) & 1) ? -1 : 0)
  #define K18 (((w0s >> (16 + 3)) & 1) ? -1 : 0)
  #define K19 (((w0s >> (16 + 2)) & 1) ? -1 : 0)
  #define K20 (((w0s >> (16 + 1)) & 1) ? -1 : 0)
  #define K21 (((w0s >> (24 + 7)) & 1) ? -1 : 0)
  #define K22 (((w0s >> (24 + 6)) & 1) ? -1 : 0)
  #define K23 (((w0s >> (24 + 5)) & 1) ? -1 : 0)
  #define K24 (((w0s >> (24 + 4)) & 1) ? -1 : 0)
  #define K25 (((w0s >> (24 + 3)) & 1) ? -1 : 0)
  #define K26 (((w0s >> (24 + 2)) & 1) ? -1 : 0)
  #define K27 (((w0s >> (24 + 1)) & 1) ? -1 : 0)
  #define K28 (((w1s >> ( 0 + 7)) & 1) ? -1 : 0)
  #define K29 (((w1s >> ( 0 + 6)) & 1) ? -1 : 0)
  #define K30 (((w1s >> ( 0 + 5)) & 1) ? -1 : 0)
  #define K31 (((w1s >> ( 0 + 4)) & 1) ? -1 : 0)
  #define K32 (((w1s >> ( 0 + 3)) & 1) ? -1 : 0)
  #define K33 (((w1s >> ( 0 + 2)) & 1) ? -1 : 0)
  #define K34 (((w1s >> ( 0 + 1)) & 1) ? -1 : 0)
  #define K35 (((w1s >> ( 8 + 7)) & 1) ? -1 : 0)
  #define K36 (((w1s >> ( 8 + 6)) & 1) ? -1 : 0)
  #define K37 (((w1s >> ( 8 + 5)) & 1) ? -1 : 0)
  #define K38 (((w1s >> ( 8 + 4)) & 1) ? -1 : 0)
  #define K39 (((w1s >> ( 8 + 3)) & 1) ? -1 : 0)
  #define K40 (((w1s >> ( 8 + 2)) & 1) ? -1 : 0)
  #define K41 (((w1s >> ( 8 + 1)) & 1) ? -1 : 0)
  #define K42 (((w1s >> (16 + 7)) & 1) ? -1 : 0)
  #define K43 (((w1s >> (16 + 6)) & 1) ? -1 : 0)
  #define K44 (((w1s >> (16 + 5)) & 1) ? -1 : 0)
  #define K45 (((w1s >> (16 + 4)) & 1) ? -1 : 0)
  #define K46 (((w1s >> (16 + 3)) & 1) ? -1 : 0)
  #define K47 (((w1s >> (16 + 2)) & 1) ? -1 : 0)
  #define K48 (((w1s >> (16 + 1)) & 1) ? -1 : 0)
  #define K49 (((w1s >> (24 + 7)) & 1) ? -1 : 0)
  #define K50 (((w1s >> (24 + 6)) & 1) ? -1 : 0)
  #define K51 (((w1s >> (24 + 5)) & 1) ? -1 : 0)
  #define K52 (((w1s >> (24 + 4)) & 1) ? -1 : 0)
  #define K53 (((w1s >> (24 + 3)) & 1) ? -1 : 0)
  #define K54 (((w1s >> (24 + 2)) & 1) ? -1 : 0)
  #define K55 (((w1s >> (24 + 1)) & 1) ? -1 : 0)

  /**
   * inner loop
   */

  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += 32)
  {
    u32 k00 = K00;
    u32 k01 = K01;
    u32 k02 = K02;
    u32 k03 = K03;
    u32 k04 = K04;
    u32 k05 = K05;
    u32 k06 = K06;
    u32 k07 = K07;
    u32 k08 = K08;
    u32 k09 = K09;
    u32 k10 = K10;
    u32 k11 = K11;
    u32 k12 = K12;
    u32 k13 = K13;
    u32 k14 = K14;
    u32 k15 = K15;
    u32 k16 = K16;
    u32 k17 = K17;
    u32 k18 = K18;
    u32 k19 = K19;
    u32 k20 = K20;
    u32 k21 = K21;
    u32 k22 = K22;
    u32 k23 = K23;
    u32 k24 = K24;
    u32 k25 = K25;
    u32 k26 = K26;
    u32 k27 = K27;

    const u32 pc_pos = il_pos / 32;

    k00 |= words_buf_s[pc_pos].b[ 0];
    k01 |= words_buf_s[pc_pos].b[ 1];
    k02 |= words_buf_s[pc_pos].b[ 2];
    k03 |= words_buf_s[pc_pos].b[ 3];
    k04 |= words_buf_s[pc_pos].b[ 4];
    k05 |= words_buf_s[pc_pos].b[ 5];
    k06 |= words_buf_s[pc_pos].b[ 6];
    k07 |= words_buf_s[pc_pos].b[ 7];
    k08 |= words_buf_s[pc_pos].b[ 8];
    k09 |= words_buf_s[pc_pos].b[ 9];
    k10 |= words_buf_s[pc_pos].b[10];
    k11 |= words_buf_s[pc_pos].b[11];
    k12 |= words_buf_s[pc_pos].b[12];
    k13 |= words_buf_s[pc_pos].b[13];
    k14 |= words_buf_s[pc_pos].b[14];
    k15 |= words_buf_s[pc_pos].b[15];
    k16 |= words_buf_s[pc_pos].b[16];
    k17 |= words_buf_s[pc_pos].b[17];
    k18 |= words_buf_s[pc_pos].b[18];
    k19 |= words_buf_s[pc_pos].b[19];
    k20 |= words_buf_s[pc_pos].b[20];
    k21 |= words_buf_s[pc_pos].b[21];
    k22 |= words_buf_s[pc_pos].b[22];
    k23 |= words_buf_s[pc_pos].b[23];
    k24 |= words_buf_s[pc_pos].b[24];
    k25 |= words_buf_s[pc_pos].b[25];
    k26 |= words_buf_s[pc_pos].b[26];
    k27 |= words_buf_s[pc_pos].b[27];

    u32 D00 = 0;
    u32 D01 = 0;
    u32 D02 = 0;
    u32 D03 = 0;
    u32 D04 = 0;
    u32 D05 = 0;
    u32 D06 = 0;
    u32 D07 = 0;
    u32 D08 = 0;
    u32 D09 = 0;
    u32 D10 = 0;
    u32 D11 = 0;
    u32 D12 = 0;
    u32 D13 = 0;
    u32 D14 = 0;
    u32 D15 = 0;
    u32 D16 = 0;
    u32 D17 = 0;
    u32 D18 = 0;
    u32 D19 = 0;
    u32 D20 = 0;
    u32 D21 = 0;
    u32 D22 = 0;
    u32 D23 = 0;
    u32 D24 = 0;
    u32 D25 = 0;
    u32 D26 = 0;
    u32 D27 = 0;
    u32 D28 = 0;
    u32 D29 = 0;
    u32 D30 = 0;
    u32 D31 = 0;
    u32 D32 = 0;
    u32 D33 = 0;
    u32 D34 = 0;
    u32 D35 = 0;
    u32 D36 = 0;
    u32 D37 = 0;
    u32 D38 = 0;
    u32 D39 = 0;
    u32 D40 = 0;
    u32 D41 = 0;
    u32 D42 = 0;
    u32 D43 = 0;
    u32 D44 = 0;
    u32 D45 = 0;
    u32 D46 = 0;
    u32 D47 = 0;
    u32 D48 = 0;
    u32 D49 = 0;
    u32 D50 = 0;
    u32 D51 = 0;
    u32 D52 = 0;
    u32 D53 = 0;
    u32 D54 = 0;
    u32 D55 = 0;
    u32 D56 = 0;
    u32 D57 = 0;
    u32 D58 = 0;
    u32 D59 = 0;
    u32 D60 = 0;
    u32 D61 = 0;
    u32 D62 = 0;
    u32 D63 = 0;

    DESCrypt
    (
      salt,
      k00, k01, k02, k03, k04, k05, k06,
      k07, k08, k09, k10, k11, k12, k13,
      k14, k15, k16, k17, k18, k19, k20,
      k21, k22, k23, k24, k25, k26, k27,
      K28, K29, K30, K31, K32, K33, K34,
      K35, K36, K37, K38, K39, K40, K41,
      K42, K43, K44, K45, K46, K47, K48,
      K49, K50, K51, K52, K53, K54, K55,
      &D00, &D01, &D02, &D03, &D04, &D05, &D06, &D07,
      &D08, &D09, &D10, &D11, &D12, &D13, &D14, &D15,
      &D16, &D17, &D18, &D19, &D20, &D21, &D22, &D23,
      &D24, &D25, &D26, &D27, &D28, &D29, &D30, &D31,
      &D32, &D33, &D34, &D35, &D36, &D37, &D38, &D39,
      &D40, &D41, &D42, &D43, &D44, &D45, &D46, &D47,
      &D48, &D49, &D50, &D51, &D52, &D53, &D54, &D55,
      &D56, &D57, &D58, &D59, &D60, &D61, &D62, &D63
    );

    u32 tmpResult = 0;

    tmpResult |= D00 ^ S00;
    tmpResult |= D01 ^ S01;
    tmpResult |= D02 ^ S02;
    tmpResult |= D03 ^ S03;
    tmpResult |= D04 ^ S04;
    tmpResult |= D05 ^ S05;
    tmpResult |= D06 ^ S06;
    tmpResult |= D07 ^ S07;
    tmpResult |= D08 ^ S08;
    tmpResult |= D09 ^ S09;
    tmpResult |= D10 ^ S10;
    tmpResult |= D11 ^ S11;
    tmpResult |= D12 ^ S12;
    tmpResult |= D13 ^ S13;
    tmpResult |= D14 ^ S14;
    tmpResult |= D15 ^ S15;

    if (tmpResult == 0xffffffff) continue;

    tmpResult |= D16 ^ S16;
    tmpResult |= D17 ^ S17;
    tmpResult |= D18 ^ S18;
    tmpResult |= D19 ^ S19;
    tmpResult |= D20 ^ S20;
    tmpResult |= D21 ^ S21;
    tmpResult |= D22 ^ S22;
    tmpResult |= D23 ^ S23;
    tmpResult |= D24 ^ S24;
    tmpResult |= D25 ^ S25;
    tmpResult |= D26 ^ S26;
    tmpResult |= D27 ^ S27;
    tmpResult |= D28 ^ S28;
    tmpResult |= D29 ^ S29;
    tmpResult |= D30 ^ S30;
    tmpResult |= D31 ^ S31;

    if (tmpResult == 0xffffffff) continue;

    tmpResult |= D32 ^ S32;
    tmpResult |= D33 ^ S33;
    tmpResult |= D34 ^ S34;
    tmpResult |= D35 ^ S35;
    tmpResult |= D36 ^ S36;
    tmpResult |= D37 ^ S37;
    tmpResult |= D38 ^ S38;
    tmpResult |= D39 ^ S39;
    tmpResult |= D40 ^ S40;
    tmpResult |= D41 ^ S41;
    tmpResult |= D42 ^ S42;
    tmpResult |= D43 ^ S43;
    tmpResult |= D44 ^ S44;
    tmpResult |= D45 ^ S45;
    tmpResult |= D46 ^ S46;
    tmpResult |= D47 ^ S47;

    if (tmpResult == 0xffffffff) continue;

    tmpResult |= D48 ^ S48;
    tmpResult |= D49 ^ S49;
    tmpResult |= D50 ^ S50;
    tmpResult |= D51 ^ S51;
    tmpResult |= D52 ^ S52;
    tmpResult |= D53 ^ S53;
    tmpResult |= D54 ^ S54;
    tmpResult |= D55 ^ S55;
    tmpResult |= D56 ^ S56;
    tmpResult |= D57 ^ S57;
    tmpResult |= D58 ^ S58;
    tmpResult |= D59 ^ S59;
    tmpResult |= D60 ^ S60;
    tmpResult |= D61 ^ S61;
    tmpResult |= D62 ^ S62;
    tmpResult |= D63 ^ S63;

    if (tmpResult == 0xffffffff) continue;

    const u32 slice = ffz (tmpResult);

    #ifdef KERNEL_STATIC
    #include COMPARE_S
    #endif
  }
}

KERNEL_FQ void m01500_mxx (KERN_ATTR_BITSLICE ())
{
}

#endif
